In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [3]:
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

print("Fake news shape:", fake.shape)
print("True news shape:", true.shape)

Fake news shape: (23481, 4)
True news shape: (21417, 4)


In [4]:
fake["label"] = 0
true["label"] = 1

df = pd.concat([fake, true], ignore_index=True)
print(df.shape)
print(df.head())

(44898, 5)
                                               title  \
0   Donald Trump Sends Out Embarrassing New Yearâ€™...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obamaâ€™s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  label  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  


In [5]:
X = df["text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Training size:", X_train.shape)
print("Testing size:", X_test.shape)

Training size: (35918,)
Testing size: (8980,)


In [6]:
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)
print("Done!")

Done!


In [7]:
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)
print("Model Trained!")

Model Trained!


In [9]:
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9868596881959911
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4733
           1       0.98      0.99      0.99      4247

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [10]:
def predict_news(news):
    news_tfidf = tfidf.transform([news])
    prediction = model.predict(news_tfidf)
    if prediction[0] == 0:
        print("FAKE NEWS! ðŸš¨")
    else:
        print("REAL NEWS! âœ…")

predict_news("NASA discovers water on Mars")
predict_news("Government secretly controls weather")

FAKE NEWS! ðŸš¨
FAKE NEWS! ðŸš¨
