# 📰 Fake News Detection using Logistic Regression
This notebook trains a Logistic Regression model to classify news articles as **Fake** or **Real** using **TF-IDF features**.

In [None]:
# 📦 Imports
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# 📂 Load dataset (from previous file)
df = pd.read_csv("fake_or_real_news.csv")
df.head()

In [None]:
# 🧹 Clean the text data
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z ]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

df['cleaned_text'] = df['text'].apply(clean_text)
df.head()

In [None]:
# 📊 Check class distribution
df['label'].value_counts()

In [None]:
# 🔠 TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['label']

In [None]:
# 🔍 Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 🤖 Train the model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Accuracy: {accuracy * 100:.2f}%")

In [None]:
# 📈 Evaluation
print("\n📄 Classification Report:\n")
print(classification_report(y_test, y_pred))

print("\n📊 Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

In [None]:
# 🧪 Predict on custom input
def predict_news(news_text):
    cleaned = clean_text(news_text)
    vec = vectorizer.transform([cleaned])
    pred = model.predict(vec)[0]
    return "REAL" if pred == 1 else "FAKE"

predict_news("Aliens have taken over the White House!")