In [10]:
                                    """
                                    [***       ---JUNAID---
                                    =====================================
                                     FAKE NEWS DETECTION - ML PROJECT
                                    =====================================
                                    Dataset Used:
                                    - /home/junaid/Downloads/FakeNewsNet.csv
                                    
                                    Columns Used:
                                    - title (text)
                                    - real (label: 1 = real news, 0 = fake news)
                                    
                                    ML Models:
                                    - Logistic Regression
                                    - Multinomial Naive Bayes
                                    - Passive Aggressive Classifier
                                    - Linear SVC
                                    
                                    Output:
                                    - Best model accuracy
                                    - Classification report
                                    - confusion_matrix.png                     ****]
                                    """



In [12]:
# ============================
# 1. IMPORTING LIBRARIES
# ============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [13]:
# ============================
# 2. LOAD DATASET
# ============================

df = pd.read_csv("/home/junaid/Downloads/FakeNewsNet.csv")

print("Dataset Loaded Successfully")
print(df.head())
print("\nColumns Found:", df.columns)

Dataset Loaded Successfully
                                               title  \
0  Kandi Burruss Explodes Over Rape Accusation on...   
1  People's Choice Awards 2018: The best red carp...   
2  Sophia Bush Sends Sweet Birthday Message to 'O...   
3  Colombian singer Maluma sparks rumours of inap...   
4  Gossip Girl 10 Years Later: How Upper East Sid...   

                                            news_url        source_domain  \
0  http://toofab.com/2017/05/08/real-housewives-a...           toofab.com   
1  https://www.today.com/style/see-people-s-choic...        www.today.com   
2  https://www.etonline.com/news/220806_sophia_bu...     www.etonline.com   
3  https://www.dailymail.co.uk/news/article-33655...  www.dailymail.co.uk   
4  https://www.zerchoo.com/entertainment/gossip-g...      www.zerchoo.com   

   tweet_num  real  
0         42     1  
1          0     1  
2         63     1  
3         20     1  
4         38     1  

Columns Found: Index(['title', 'news_url', 's

In [21]:
# ============================
# 3. CLEAN & PREPARE DATA
# ============================

df["title"] = df["title"].fillna("")

X = df["title"]       # news headlines
y = df["real"]        # label (1 = real, 0 = fake)

In [22]:
# ============================
# 4. TRAIN-TEST SPLIT
# ============================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [23]:
# ============================
# 5. TF-IDF VECTORIZATION
# ============================

vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [24]:
# ============================
# 6. MACHINE LEARNING MODELS
# ============================

models = {
    "LogisticRegression": LogisticRegression(max_iter=300),
    "MultinomialNB": MultinomialNB(),
    "PassiveAggressive": PassiveAggressiveClassifier(max_iter=200),
    "LinearSVC": LinearSVC()
}
results = {}

print("\n============================")
print(" TRAINING MODELS ")
print("============================")

for name, model in models.items():
    print(f"\nTraining {name} ...")
    model.fit(X_train_tfidf, y_train)
    predictions = model.predict(X_test_tfidf)
    acc = accuracy_score(y_test, predictions)
    results[name] = acc
    print(f"{name} Accuracy: {acc:.4f}")


 TRAINING MODELS 

Training LogisticRegression ...
LogisticRegression Accuracy: 0.8297

Training MultinomialNB ...
MultinomialNB Accuracy: 0.8216

Training PassiveAggressive ...
PassiveAggressive Accuracy: 0.7828

Training LinearSVC ...
LinearSVC Accuracy: 0.8304


In [18]:
# ============================
# 7. BEST MODEL SELECTION
# ============================

best_model_name = max(results, key=results.get)
best_accuracy = results[best_model_name]
best_model = models[best_model_name]

print("\n============================")
print(" BEST MODEL SELECTED ")
print("============================")
print(f"Best Model: {best_model_name} with accuracy {best_accuracy:.4f}")



 BEST MODEL SELECTED 
Best Model: LinearSVC with accuracy 0.8304


In [19]:
# ============================
# 8. FINAL EVALUATION
# ============================

final_preds = best_model.predict(X_test_tfidf)

print("\nClassification Report:\n")
print(classification_report(y_test, final_preds))


Classification Report:

              precision    recall  f1-score   support

           0       0.69      0.55      0.61      1131
           1       0.86      0.92      0.89      3509

    accuracy                           0.83      4640
   macro avg       0.78      0.74      0.75      4640
weighted avg       0.82      0.83      0.82      4640



In [20]:
# ============================
# 9. CONFUSION MATRIX IMAGE
# ============================

cm = confusion_matrix(y_test, final_preds)

plt.figure(figsize=(5, 4))
plt.imshow(cm)
plt.title("Confusion Matrix")
plt.colorbar()
plt.savefig("confusion_matrix.png", dpi=150)
plt.close()

print("\nSaved: confusion_matrix.png")
print("\nProject Finished Successfully!")


Saved: confusion_matrix.png

Project Finished Successfully!


In [7]:
# =====================================
# 10. TEST THE MODEL ON CUSTOM INPUT
# =====================================

def predict_news(text):
    """
    Predicts if the given news title is REAL or FAKE.
    """
    text_tfidf = vectorizer.transform([text])
    pred = best_model.predict(text_tfidf)[0]

    if pred == 1:
        return "游릭 REAL NEWS"
    else:
        return "游댮 FAKE NEWS"


# ---- TEST EXAMPLES ----

print("\n============================")
print(" CUSTOM NEWS PREDICTION ")
print("============================")

test_headline = "Government announces new plan for economic growth"

result = predict_news(test_headline)
print(f"Input: {test_headline}")
print("Prediction:", result)



 CUSTOM NEWS PREDICTION 
Input: Government announces new plan for economic growth
Prediction: 游릭 REAL NEWS


In [8]:
samples = [
    "Breaking: Celebrity involved in secret alien scandal",
    "Prime Minister launches new digital education programme",
    "Scientists discover that chocolate cures all diseases"
]

for s in samples:
    print("\nHeadline:", s)
    print("Prediction:", predict_news(s))



Headline: Breaking: Celebrity involved in secret alien scandal
Prediction: 游댮 FAKE NEWS

Headline: Prime Minister launches new digital education programme
Prediction: 游릭 REAL NEWS

Headline: Scientists discover that chocolate cures all diseases
Prediction: 游릭 REAL NEWS
