In [5]:
# 1. Import required libraries
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import files

In [6]:
# 2. Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
# 3. Upload both CSV files
print("📁 Please upload both Fake.csv and True.csv")
uploaded = files.upload()

📁 Please upload both Fake.csv and True.csv


Saving True.csv to True (2).csv
Saving Fake.csv to Fake (2).csv


In [None]:
# 4. Load the files
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

In [None]:
# 5. Add labels: 0 for fake, 1 for real
fake['label'] = 0
true['label'] = 1

In [None]:
# 6. Combine datasets
data = pd.concat([fake, true], ignore_index=True)

In [None]:
# 7. Drop any rows missing 'text' or 'label'
data.dropna(subset=['text', 'label'], inplace=True)

In [None]:
# 8. Define features and labels
X = data['text']
y = data['label']

In [None]:
# 9. Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
# 10. TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [None]:
# 11. Train a Naive Bayes model
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [None]:
# 12. Evaluate the model
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ Model Accuracy: {accuracy * 100:.2f}%")

print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))

print("\n🧮 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
# 13. Predict on a custom news input
sample = ["Breaking: NASA discovers signs of water on Mars"]
sample_tfidf = tfidf.transform(sample)
prediction = model.predict(sample_tfidf)

print("\n📰 Sample News Prediction:")
print("REAL" if prediction[0] == 1 else "FAKE")