In [1]:
#1.Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

In [2]:
# 2.Load Cleaned Dataset
df = pd.read_csv("../data/processed/news_cleaned.csv")
df.head()

Unnamed: 0,title,text,label,clean_text
0,Ben Stein Calls Out 9th Circuit Court: Committ...,"21st Century Wire says Ben Stein, reputable pr...",0,st century wire say ben stein reputable profes...
1,Trump drops Steve Bannon from National Securit...,WASHINGTON (Reuters) - U.S. President Donald T...,1,washington reuters u president donald trump re...
2,Puerto Rico expects U.S. to lift Jones Act shi...,(Reuters) - Puerto Rico Governor Ricardo Rosse...,1,reuters puerto rico governor ricardo rossello ...
3,OOPS: Trump Just Accidentally Confirmed He Le...,"On Monday, Donald Trump once again embarrassed...",0,monday donald trump embarrassed country accide...
4,Donald Trump heads for Scotland to reopen a go...,"GLASGOW, Scotland (Reuters) - Most U.S. presid...",1,glasgow scotland reuters u presidential candid...


In [3]:
# 3.Define Features & Target
X = df["clean_text"]
y = df["label"]

In [4]:
# 4.Train–Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=y
)

In [5]:
# 5.TF-IDF Vectorization
tfidf = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1,2)
)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [6]:
# 6.Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

In [7]:
# 7.Quick Training Accuracy Check
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9906076722869752

## 8.Save Model & Vectorizer

- ⚠️ VERY IMPORTANT for deployment later

In [9]:
with open("../models/fake_news_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("../models/tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)

print("✅ Model and Vectorizer saved successfully")

✅ Model and Vectorizer saved successfully


## ✅ STEP 3 OUTPUT (What we Achieved)

- ✔ Converted text to numerical features
- ✔ Trained a strong ML model
- ✔ Achieved high accuracy
- ✔ Saved model for real-world use

### This is real ML engineering, not tutorial stuff.