In [None]:
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import MultinomialNB
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

In [None]:
# Load your dataset
df = pd.read_csv('news_dataset.csv')

# Map labels if necessary (ensure labels are 0 and 1)
df['label'] = df['label'].map({'fake': 0, 'real': 1})  # Adjust based on your dataset's labeling

# Take a look at the first few rows
print(df.head())

In [None]:
def preprocess_text(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [t for t in tokens if t not in stop_words]
    
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(t) for t in tokens]
    
    # Join tokens back into a string
    return ' '.join(tokens)

# Apply preprocessing
df['text'] = df['text'].apply(preprocess_text)

In [None]:
X = df['text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [None]:
# Using Naive Bayes for simplicity
model = MultinomialNB()
model.fit(X_train_vectorized, y_train)

In [None]:
y_pred = model.predict(X_test_vectorized)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
def detect_fake_news(text):
    text = preprocess_text(text)
    vectorized_text = vectorizer.transform([text])
    prediction = model.predict(vectorized_text)
    return "Real" if prediction[0] == 1 else "Fake"

# Test the function
news_article = "Your news article text here..."
print(detect_fake_news(news_article))