In [1]:
!pip install "numpy<2"


Defaulting to user installation because normal site-packages is not writeable


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import re


def load_data():
    return pd.read_csv("fake_news_dataset.csv")  


def preprocess_text(text):
    text = text.lower()  
    text = re.sub(r'\s+', ' ', text)  
    text = re.sub(r'[^\w\s]', '', text)  
    return text


df = load_data()
df['news'] = df['news'].apply(preprocess_text)


print("Subjects in the dataset:", df['subject'].unique())

X = df['news'] 
y = df['subject'].map({'fake': 0, 'real': 1})  


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


vectorizer = CountVectorizer(max_features=5000, stop_words='english')
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)


model = LogisticRegression(max_iter=1000, class_weight='balanced')  
model.fit(X_train, y_train)


predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"\n📊 Logistic Regression Model trained successfully with an accuracy of: {accuracy * 100:.2f}%")
print("\nClassification Report (Precision, Recall, F1-Score):")
print(classification_report(y_test, predictions, target_names=['Fake', 'Real']))


print("\n📰 Real-Time Fake News Detector")
print("Enter a news article or headline below to check if it's REAL or FAKE.")
print("Type 'exit' to quit.\n")

while True:
    user_input = input("Enter news content: ").strip()
    
    if user_input.lower() == 'exit':
        print("Exiting the Fake News Detector. Stay informed! 👋")
        break

    if user_input:
        vectorized_input = vectorizer.transform([user_input])
        prediction = model.predict(vectorized_input)[0]
        probability = model.predict_proba(vectorized_input)[0][prediction]  # Confidence score

        if prediction == 1:
            print(f"✅ This news is likely **REAL** with {probability * 100:.2f}% confidence!\n")
        else:
            print(f"🚨 This news is likely **FAKE** with {probability * 100:.2f}% confidence!\n")
    else:
        print("⚠️ Please enter some text to analyze!\n")


  from pandas.core import (


Subjects in the dataset: ['real' 'fake']

📊 Logistic Regression Model trained successfully with an accuracy of: 66.67%

Classification Report (Precision, Recall, F1-Score):
              precision    recall  f1-score   support

        Fake       1.00      0.33      0.50         3
        Real       0.60      1.00      0.75         3

    accuracy                           0.67         6
   macro avg       0.80      0.67      0.62         6
weighted avg       0.80      0.67      0.62         6


📰 Real-Time Fake News Detector
Enter a news article or headline below to check if it's REAL or FAKE.
Type 'exit' to quit.

Enter news content: Politician claims Earth is flat in controversial speech
🚨 This news is likely **FAKE** with 80.41% confidence!

Enter news content: MR Narendra modi is indian
✅ This news is likely **REAL** with 52.18% confidence!

Enter news content: exit
Exiting the Fake News Detector. Stay informed! 👋
