In [73]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import re
import string

In [74]:
data_fake=pd.read_csv("Fake.csv")
data_true=pd.read_csv("True.csv")

In [75]:
data_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [76]:
data_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [77]:
data_fake["class"]=0
data_true["class"]=1

In [78]:
data_fake.shape, data_true.shape

((23481, 5), (21417, 5))

In [79]:
##Manual Testing by deleting rows from both datasets

for i in range(21416, 21406, -1):
    data_true.drop([i], axis=0, inplace=True)

In [80]:
data_true.shape

(21407, 5)

In [81]:
data_fake["class"]=0
data_true["class"]=1


In [82]:
data_fake.head()


Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [83]:
data_true.head()

Unnamed: 0,title,text,subject,date,class
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [84]:
data_merge=pd.concat([data_true,data_fake])
data_merge.shape

(44888, 5)

In [85]:
data_merge.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [86]:
df=data_merge.drop(["title", "subject", "date"], axis=1)

In [87]:
df.head()

Unnamed: 0,text,class
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1
2,WASHINGTON (Reuters) - The special counsel inv...,1
3,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [88]:
#Ramdom Shuffling
data= df.sample(frac=1)

In [89]:
data.head()

Unnamed: 0,text,class
20003,Feel good video of the day A female ISIS recru...,0
13825,HAVANA (Reuters) - Cuba took another step on S...,1
17978,BRUSSELS (Reuters) - The European Union will c...,1
19235,"ERBIL, Iraq (Reuters) - A delegation from the ...",1
21273,BERLIN (Reuters) - A regional parliament has l...,1


In [90]:
data.reset_index(inplace= True)
data.drop(['index'], axis=1, inplace=True)

In [92]:
data.columns

Index(['text', 'class'], dtype='object')

In [104]:
## Function to Process the Text
def word_process(text):
    text=text.lower()    ## Lower case
    text=re.sub(".,@#$%^&*()_+?/","",text )    #sub means substring
    text=re.sub("\\W", " ", text) #this will remove given substring from dataset
    text=re.sub("http?://\SwwwS+","", text)
    text=re.sub("<>", "count=",text)
    text=re.sub("[%s]" % re.escape(string.punctuation),"", text)
    text=re.sub("\n","", text)
    return text

In [105]:
data["text"]=data['text'].apply(word_process)

In [106]:
x=data["text"]
y=data["class"]

In [107]:
x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.25)

In [108]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization=TfidfVectorizer()
xv_train=vectorization.fit_transform(x_train)
xv_test=vectorization.transform(x_test)


In [109]:
from sklearn.linear_model import LogisticRegression

LR=LogisticRegression()
LR.fit(xv_train, y_train)

In [110]:
pred_lr=LR.predict(xv_test)


In [111]:
LR.score(xv_test, y_test)

0.9878809481375869

In [113]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5929
           1       0.99      0.99      0.99      5293

    accuracy                           0.99     11222
   macro avg       0.99      0.99      0.99     11222
weighted avg       0.99      0.99      0.99     11222



In [114]:
from sklearn.tree import DecisionTreeClassifier

DT=DecisionTreeClassifier()
DT.fit(xv_train, y_train)


In [None]:
pred_dt=DT.predict(xv_test)

In [None]:
DT.score(xv_test, pred_dt)