In [None]:
%pip install pandas scikit-learn joblib


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib
import re
import os

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

base_dir = os.getcwd()
base_dir


'c:\\Users\\lnrka\\OneDrive\\Desktop\\FakeNewsApp\\backend\\model'

In [3]:
fake_path = os.path.join(base_dir, "Fake.csv")
true_path = os.path.join(base_dir, "True.csv")

fake = pd.read_csv(r"C:\Users\lnrka\OneDrive\Desktop\FakeNewsApp\backend\model\Fake.csv")
true = pd.read_csv(r"C:\Users\lnrka\OneDrive\Desktop\FakeNewsApp\backend\model\True.csv")

fake.head(), true.head()


(                                               title  \
 0   Donald Trump Sends Out Embarrassing New Year’...   
 1   Drunk Bragging Trump Staffer Started Russian ...   
 2   Sheriff David Clarke Becomes An Internet Joke...   
 3   Trump Is So Obsessed He Even Has Obama’s Name...   
 4   Pope Francis Just Called Out Donald Trump Dur...   
 
                                                 text subject  \
 0  Donald Trump just couldn t wish all Americans ...    News   
 1  House Intelligence Committee Chairman Devin Nu...    News   
 2  On Friday, it was revealed that former Milwauk...    News   
 3  On Christmas day, Donald Trump announced that ...    News   
 4  Pope Francis used his annual Christmas Day mes...    News   
 
                 date  
 0  December 31, 2017  
 1  December 31, 2017  
 2  December 30, 2017  
 3  December 29, 2017  
 4  December 25, 2017  ,
                                                title  \
 0  As U.S. budget fight looms, Republicans flip t...   
 1  U

In [4]:
# 1 = FAKE, 0 = TRUE (REAL)
fake["label"] = 1
true["label"] = 0

df = pd.concat([fake, true], axis=0).reset_index(drop=True)

df[["title", "text", "label"]].head()


Unnamed: 0,title,text,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,1


In [5]:
# Combine title + text as one single string for model input
df["text_combined"] = (df["title"].astype(str) + " " + df["text"].astype(str))
df["text_clean"] = df["text_combined"].apply(clean_text)

X = df["text_clean"]
y = df["label"]

X.head(), y.head()


(0    donald trump sends out embarrassing new years ...
 1    drunk bragging trump staffer started russian c...
 2    sheriff david clarke becomes an internet joke ...
 3    trump is so obsessed he even has obamas name c...
 4    pope francis just called out donald trump duri...
 Name: text_clean, dtype: object,
 0    1
 1    1
 2    1
 3    1
 4    1
 Name: label, dtype: int64)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

X_train_vec.shape, X_test_vec.shape


((35918, 5000), (8980, 5000))

In [7]:
model = LogisticRegression(max_iter=2000)
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)


Accuracy: 0.9913140311804008


In [8]:
model_path = os.path.join(base_dir, "fake_news_model.pkl")
vec_path = os.path.join(base_dir, "vectorizer.pkl")

joblib.dump(model, model_path)
joblib.dump(vectorizer, vec_path)

model_path, vec_path


('c:\\Users\\lnrka\\OneDrive\\Desktop\\FakeNewsApp\\backend\\model\\fake_news_model.pkl',
 'c:\\Users\\lnrka\\OneDrive\\Desktop\\FakeNewsApp\\backend\\model\\vectorizer.pkl')

In [10]:
BASE_DIR = os.getcwd()
MODEL_DIR = BASE_DIR   # because you are already in model folder
