In [2]:
# ✅ Fake News Detection using TF-IDF and Logistic Regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report



In [5]:
# ✅ STEP 1: Load cleaned online dataset
url = "/content/fakenews.csv"
df = pd.read_csv(url, engine='python', on_bad_lines='skip')

In [8]:
# ✅ STEP 2: Check and clean data
df = df[['title', 'text', 'subject']].dropna()
df['content'] = df['title'] + " " + df['text']
df['label'] = df['subject'].apply(lambda x: 0 if x in ['News', 'politics'] else 1)

In [10]:
# ✅ STEP 3: Split data
X = df['content']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [11]:
# ✅ STEP 4: Vectorize using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [12]:
# ✅ STEP 5: Model training
model = LogisticRegression()
model.fit(X_train_vec, y_train)

In [13]:
# ✅ STEP 6: Prediction and Accuracy
y_pred = model.predict(X_test_vec)

print("✅ Accuracy Score:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))

✅ Accuracy Score: 0.7136773973769375

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.86      0.80      3971
           1       0.58      0.40      0.48      1900

    accuracy                           0.71      5871
   macro avg       0.67      0.63      0.64      5871
weighted avg       0.70      0.71      0.70      5871

