In [1]:
# Step 1: Import libraries
import pandas as pd

# Step 2: Load datasets
fake = pd.read_csv('dataset/Fake.csv')
true = pd.read_csv('dataset/True.csv')

# Step 3: Add labels (0 = Fake, 1 = Real)
fake['label'] = 0
true['label'] = 1

# Step 4: Combine datasets and shuffle
data = pd.concat([fake, true], axis=0)
data = data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Step 5: Display the first 5 rows
data.head()


Unnamed: 0,title,text,subject,date,label
0,Obama’s Legacy Before American Safety: Feds Lo...,The Feds are looking at several states to tran...,Government News,"Oct 4, 2015",0
1,Notorious RBG Throws AMAZING Shade At Trump F...,We love Ruth Bader Ginsburg. She s the sweet ...,News,"November 10, 2016",0
2,“PROUD TRANSGENDER” Democrat Candidate For Con...,"WQOW -A Lacrosse, WI transgender person, who r...",politics,15-Feb-18,0
3,THIS IS GREAT! You’ll Love What Joy Villa Want...,,politics,"Feb 15, 2017",0
4,MAYORAL CANDIDATE In DIE-HARD DEMOCRAT City Ma...,If John Persinger wins the mayoral race in Eri...,politics,"Oct 8, 2017",0


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Step 1: Define features and labels
X = data['text']    # News article content
y = data['label']   # 0 = Fake, 1 = Real

# Step 2: Split into training and testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 3: TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 4: Show shape of training data
X_train_vec.shape, X_test_vec.shape


((35918, 111212), (8980, 111212))

In [5]:
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 1: Initialize model
model = PassiveAggressiveClassifier(max_iter=50)

# Step 2: Train the model
model.fit(X_train_vec, y_train)

# Step 3: Predict on test data
y_pred = model.predict(X_test_vec)

# Step 4: Evaluate
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("✅ Model Trained Successfully!")
print(f"🔍 Accuracy: {accuracy * 100:.2f}%")
print("🧾 Confusion Matrix:\n", conf_matrix)


✅ Model Trained Successfully!
🔍 Accuracy: 99.45%
🧾 Confusion Matrix:
 [[4710   31]
 [  18 4221]]


In [7]:
import joblib

# Save trained model
joblib.dump(model, 'models/fake_news_model.pkl')

# Save vectorizer
joblib.dump(vectorizer, 'models/vectorizer.pkl')

print("✅ Model and vectorizer saved successfully!")


✅ Model and vectorizer saved successfully!
