In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
data = pd.read_csv("cleaned_news.csv")   # <-- make sure this file is in your Jupyter directory
print("Dataset shape:", data.shape)
print(data.head())


Dataset shape: (38646, 2)
                                                text  label
0  WASHINGTON (Reuters) - The head of a conservat...      1
1  WASHINGTON (Reuters) - Transgender people will...      1
2  WASHINGTON (Reuters) - The special counsel inv...      1
3  WASHINGTON (Reuters) - Trump campaign adviser ...      1
4  SEATTLE/WASHINGTON (Reuters) - President Donal...      1


In [3]:
X = data["text"]
y = data["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [4]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [5]:
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train_tfidf, y_train)
y_pred_log = log_model.predict(X_test_tfidf)


In [6]:
pac_model = PassiveAggressiveClassifier(max_iter=50)
pac_model.fit(X_train_tfidf, y_train)
y_pred_pac = pac_model.predict(X_test_tfidf)

In [9]:
print("\n=== Logistic Regression Performance ===")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log))

print("\n=== Passive Aggressive Classifier Performance ===")
print("Accuracy:", accuracy_score(y_test, y_pred_pac))
print(classification_report(y_test, y_pred_pac))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_pac))


=== Logistic Regression Performance ===
Accuracy: 0.9817593790426908
              precision    recall  f1-score   support

           0       0.99      0.97      0.98      3491
           1       0.97      0.99      0.98      4239

    accuracy                           0.98      7730
   macro avg       0.98      0.98      0.98      7730
weighted avg       0.98      0.98      0.98      7730

Confusion Matrix:
 [[3382  109]
 [  32 4207]]

=== Passive Aggressive Classifier Performance ===
Accuracy: 0.9899094437257439
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      3491
           1       0.99      1.00      0.99      4239

    accuracy                           0.99      7730
   macro avg       0.99      0.99      0.99      7730
weighted avg       0.99      0.99      0.99      7730

Confusion Matrix:
 [[3431   60]
 [  18 4221]]


In [8]:
sample_news = [
    "The government has announced a new policy to boost economy.",
    "Aliens landed in my backyard yesterday night!"
]
sample_tfidf = vectorizer.transform(sample_news)

print("\nSample Predictions (0=Fake, 1=True):")
print(pac_model.predict(sample_tfidf))


Sample Predictions (0=Fake, 1=True):
[0 0]
