In [15]:
import os
import zipfile
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.feature_extraction.text import TfidfVectorizer


In [16]:
with zipfile.ZipFile("fake-and-real-news-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("news_data")

print(" Dataset extracted!")



 Dataset extracted!


In [17]:
fake_df=pd.read_csv("news_data/Fake.csv")
real_df=pd.read_csv("news_data/True.csv")
fake_df.head(10)


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017"
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017"
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017"
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017"
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017"


In [18]:
fake_df["label"] = "FAKE"
real_df["label"] = "REAL"


In [19]:
fake_df.head(10)

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",FAKE
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",FAKE
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",FAKE
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",FAKE
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",FAKE
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",FAKE
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",FAKE
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",FAKE
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",FAKE
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",FAKE


In [20]:
print(fake_df.columns)
print(real_df.columns)


Index(['title', 'text', 'subject', 'date', 'label'], dtype='object')
Index(['title', 'text', 'subject', 'date', 'label'], dtype='object')


In [21]:
df=pd.concat([fake_df,real_df])
df = df.sample(frac=1).reset_index(drop=True)  # Shuffle rows

df = df[["text", "label"]]


In [22]:
x=df["text"]
y=df["label"]

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [24]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)


(33673,) (33673,)
(11225,) (11225,)


In [25]:
vectorizer=TfidfVectorizer(stop_words='english',max_df=0.7)

In [26]:
tfid_train=vectorizer.fit_transform(x_train)

tfid_test=vectorizer.transform(x_test)
print("Text vectorization successfull")

Text vectorization successfull


In [29]:
try:
    
    model=PassiveAggressiveClassifier(max_iter=100)
    model.fit(tfid_train,y_train)
    print("Model Trained" )
except Exception as e :
        print("there is an error in training")        

Model Trained


In [31]:
y_pred=model.predict(tfid_test)
y_pred

array(['REAL', 'FAKE', 'REAL', ..., 'REAL', 'FAKE', 'FAKE'], dtype='<U4')

In [32]:
acc=accuracy_score(y_test,y_pred)
cm=confusion_matrix(y_test,y_pred)
report=classification_report(y_test,y_pred)


In [39]:
print("Evaluation Results :")
print("------------------------")
print(f"Accuracy score is : {acc:.5f}")
print("\n Confusion_matrix : \n",cm)
print("\n Classification report : \n",report)


Evaluation Results :
------------------------
Accuracy score is : 0.99376

 Confusion_matrix : 
 [[5832   42]
 [  28 5323]]

 Classification report : 
               precision    recall  f1-score   support

        FAKE       1.00      0.99      0.99      5874
        REAL       0.99      0.99      0.99      5351

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225

