# 📰 Fake News Detection
Build a binary classification model to detect whether a news article is real or fake.

## 1. Install Required Packages

In [4]:
!pip install -q nltk scikit-learn pandas


## 2. Import Libraries

In [5]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix

nltk.download('stopwords')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lenovoo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## 3. Load and Prepare the Dataset

In [6]:
# Load the datasets
fake_df = pd.read_csv("News_dataset/Fake.csv")
true_df = pd.read_csv("News_dataset/True.csv")

# Add label: 1 = fake, 0 = true
fake_df['label'] = 1
true_df['label'] = 0

# Combine and shuffle
df = pd.concat([fake_df, true_df], axis=0).sample(frac=1, random_state=42).reset_index(drop=True)
df[['title', 'text', 'label']].head()


Unnamed: 0,title,text,label
0,Ben Stein Calls Out 9th Circuit Court: Committ...,"21st Century Wire says Ben Stein, reputable pr...",1
1,Trump drops Steve Bannon from National Securit...,WASHINGTON (Reuters) - U.S. President Donald T...,0
2,Puerto Rico expects U.S. to lift Jones Act shi...,(Reuters) - Puerto Rico Governor Ricardo Rosse...,0
3,OOPS: Trump Just Accidentally Confirmed He Le...,"On Monday, Donald Trump once again embarrassed...",1
4,Donald Trump heads for Scotland to reopen a go...,"GLASGOW, Scotland (Reuters) - Most U.S. presid...",0


## 4. Clean the Text Data

In [7]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r"http\S+", "", str(text))
    text = re.sub(r"[^a-zA-Z]", " ", text)
    text = text.lower()
    return " ".join([word for word in text.split() if word not in stop_words])

df['text_clean'] = df['text'].apply(clean_text)
df[['text', 'text_clean']].head()


Unnamed: 0,text,text_clean
0,"21st Century Wire says Ben Stein, reputable pr...",st century wire says ben stein reputable profe...
1,WASHINGTON (Reuters) - U.S. President Donald T...,washington reuters u president donald trump re...
2,(Reuters) - Puerto Rico Governor Ricardo Rosse...,reuters puerto rico governor ricardo rossello ...
3,"On Monday, Donald Trump once again embarrassed...",monday donald trump embarrassed country accide...
4,"GLASGOW, Scotland (Reuters) - Most U.S. presid...",glasgow scotland reuters u presidential candid...


## 5. TF-IDF Vectorization

In [8]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['text_clean']).toarray()
y = df['label']


## 6. Train/Test Split

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


## 7. Train Logistic Regression Model

In [10]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_val)


## 8. Evaluate the Model

In [None]:
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))

[[4220   50]
 [  84 4626]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      4270
           1       0.99      0.98      0.99      4710

    accuracy                           0.99      8980
   macro avg       0.98      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980

