In [2]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

In [3]:
true_df = pd.read_csv(r"C:\Users\subha\Downloads\archive (13)\True.csv")
fake_df = pd.read_csv(r"C:\Users\subha\Downloads\archive (13)\Fake.csv")

# Label the datasets
true_df['label'] = 'REAL'
fake_df['label'] = 'FAKE'

# Combine and shuffle
df = pd.concat([true_df, fake_df], ignore_index=True)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

df.head()


Unnamed: 0,title,text,subject,date,label
0,BREAKING: GOP Chairman Grassley Has Had Enoug...,"Donald Trump s White House is in chaos, and th...",News,"July 21, 2017",FAKE
1,Failed GOP Candidates Remembered In Hilarious...,Now that Donald Trump is the presumptive GOP n...,News,"May 7, 2016",FAKE
2,Mike Pence’s New DC Neighbors Are HILARIOUSLY...,Mike Pence is a huge homophobe. He supports ex...,News,"December 3, 2016",FAKE
3,California AG pledges to defend birth control ...,SAN FRANCISCO (Reuters) - California Attorney ...,politicsNews,"October 6, 2017",REAL
4,AZ RANCHERS Living On US-Mexico Border Destroy...,Twisted reasoning is all that comes from Pelos...,politics,"Apr 25, 2017",FAKE


In [4]:
def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['combined'] = df['title'] + " " + df['text']
df['clean_text'] = df['combined'].apply(clean_text)


In [5]:
X = df['clean_text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [6]:
model = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_df=0.7)),
    ('clf', LogisticRegression(max_iter=1000))
])

model.fit(X_train, y_train)


0,1,2
,steps,"[('tfidf', ...), ('clf', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,input,'content'
,encoding,'utf-8'
,decode_error,'strict'
,strip_accents,
,lowercase,True
,preprocessor,
,tokenizer,
,analyzer,'word'
,stop_words,'english'
,token_pattern,'(?u)\\b\\w\\w+\\b'

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [7]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9856347438752784

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.99      0.98      0.99      4669
        REAL       0.98      0.99      0.99      4311

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980


Confusion Matrix:
 [[4593   76]
 [  53 4258]]


In [8]:
joblib.dump(model, "fake_news_model_v2.pkl")


['fake_news_model_v2.pkl']

In [9]:
def trim_text(text, max_len=500):
    return text[:max_len]

def predict_news(text, model):
    trimmed = trim_text(text)
    cleaned = clean_text(trimmed)
    pred = model.predict([cleaned])[0]
    prob = model.predict_proba([cleaned])[0]
    conf = round(max(prob) * 100, 2)
    return pred, conf


In [10]:
samples = [
    "NASA confirms new satellite launch to monitor climate.",
    "Obama caught on camera meeting aliens!",
    "Bill Gates to implant chips in everyone by 2026.",
    "Government introduces AI education in schools.",
]

for s in samples:
    label, confidence = predict_news(s, model)
    print(f"Text: {s}")
    print(f"→ Prediction: {label} ({confidence}%)\n")


Text: NASA confirms new satellite launch to monitor climate.
→ Prediction: FAKE (71.99%)

Text: Obama caught on camera meeting aliens!
→ Prediction: FAKE (96.65%)

Text: Bill Gates to implant chips in everyone by 2026.
→ Prediction: FAKE (72.7%)

Text: Government introduces AI education in schools.
→ Prediction: FAKE (62.01%)



In [11]:
import re
import string

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


In [12]:
def predict_news(text, model):
    cleaned = clean_text(text)
    prediction = model.predict([cleaned])[0]
    probability = model.predict_proba([cleaned])[0]
    confidence = round(max(probability) * 100, 2)
    
    return prediction, confidence


In [13]:
sample_inputs = [
    "NASA successfully launches a satellite to track climate change.",
    "BREAKING: Biden caught in scandal involving Mars aliens!",
    "Government announces new economic stimulus package.",
    "Elon Musk says Tesla will move headquarters to Mars."
]

for input_text in sample_inputs:
    label, conf = predict_news(input_text, model)
    print(f"Text: {input_text}")
    print(f"Prediction: {label} (Confidence: {conf}%)\n")


Text: NASA successfully launches a satellite to track climate change.
Prediction: FAKE (Confidence: 75.57%)

Text: BREAKING: Biden caught in scandal involving Mars aliens!
Prediction: FAKE (Confidence: 96.96%)

Text: Government announces new economic stimulus package.
Prediction: FAKE (Confidence: 50.9%)

Text: Elon Musk says Tesla will move headquarters to Mars.
Prediction: FAKE (Confidence: 68.09%)



In [14]:
test_samples = [
    "Scientists discover a new species in the Amazon rainforest.",
    "Obama found running secret government from a UFO!",
    "Google launches new AI chip to power Gemini models.",
    "Bill Gates admits plan to control the world through vaccines!"
]

for text in test_samples:
    label, conf = predict_news(text, model)
    print(f"{text}\n→ {label} ({conf}%)\n")


Scientists discover a new species in the Amazon rainforest.
→ FAKE (75.61%)

Obama found running secret government from a UFO!
→ FAKE (89.1%)

Google launches new AI chip to power Gemini models.
→ FAKE (79.12%)

Bill Gates admits plan to control the world through vaccines!
→ FAKE (77.95%)



In [15]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Test Accuracy: 0.9856347438752784


In [16]:
import streamlit as st
import joblib
import re
import string

In [17]:
model = joblib.load("fake_news_model_v2.pkl")

In [18]:
def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [19]:
def trim_text(text, max_len=500):
    return text[:max_len]

In [20]:
def predict_news(title, body, model):
    combined = title + " " + body
    trimmed = trim_text(combined)
    cleaned = clean_text(trimmed)
    pred = model.predict([cleaned])[0]
    prob = model.predict_proba([cleaned])[0]
    conf = round(max(prob) * 100, 2)
    return pred, conf

In [21]:
st.set_page_config(page_title="Fake News Detector", layout="centered")

st.title("📰 Fake News Detection Web App")
st.markdown("A simple ML-powered tool to classify news articles as **FAKE** or **REAL**.")

st.write("---")

2025-07-06 18:41:19.789 
  command:

    streamlit run C:\Users\subha\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [22]:
title_input = st.text_input("📝 News Title", placeholder="e.g. Elon Musk plans to build city on Mars")
text_input = st.text_area("📄 News Body", height=200, placeholder="Enter the full news article content here...")


2025-07-06 18:41:19.840 Session state does not function when running a script without `streamlit run`


In [23]:
if st.button("🕵️‍♂️ Detect"):
    if not title_input or not text_input:
        st.warning("Please enter both title and text to proceed.")
    else:
        label, confidence = predict_news(title_input, text_input, model)

        if label == "FAKE":
            st.error(f"🛑 Prediction: **FAKE News** ({confidence}% confidence)")
        else:
            st.success(f"✅ Prediction: **REAL News** ({confidence}% confidence)")

        st.markdown("---")
        st.subheader("🧠 Model Info")
        st.markdown("- Model: Logistic Regression + TF-IDF")
        st.markdown("- Input: Combined Title + Text (trimmed to 500 chars)")
        st.markdown("- Accuracy: ~98.5% on test data")



In [24]:
import joblib
joblib.dump(model, "fake_news_model_v2.pkl")


['fake_news_model_v2.pkl']