# Fake News Detection


## importing libraries

In [122]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import string
from sklearn.utils import shuffle

## Importing datasets


In [123]:
# df_fake = pd.read_csv('Fake.csv', delimiter=',', low_memory=False, on_bad_lines='skip')
df_fake = pd.read_csv('Fake.csv')
display(df_fake.head())
display(df_fake.columns)

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


Index(['title', 'text', 'subject', 'date'], dtype='object')

In [124]:
df_true = pd.read_csv('True.csv',on_bad_lines = 'skip')
df_true

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"
...,...,...,...,...
27762,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017"
27763,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017"
27764,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017"
27765,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017"


In [125]:
df_fake['class'] = 0
df_true['class'] = 1

### merging the datasets

In [126]:
df = pd.concat([df_fake,df_true], axis = 0)
df

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
...,...,...,...,...,...
27762,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
27763,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
27764,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
27765,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1


In [127]:
# only keeping text as it is only relevant
df = df.drop(['title','subject','date'],axis = 1)


In [128]:
df

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
...,...,...
27762,BRUSSELS (Reuters) - NATO allies on Tuesday we...,1
27763,"LONDON (Reuters) - LexisNexis, a provider of l...",1
27764,MINSK (Reuters) - In the shadow of disused Sov...,1
27765,MOSCOW (Reuters) - Vatican Secretary of State ...,1


In [129]:
df.reset_index(inplace = True)

In [130]:
df.drop(['index'],axis = 1, inplace = True)

## data preprocessing

In [131]:
def clean_text(text):
  text = text.lower()
  text = re.sub('\[.*?\]',"",text)
  text = re.sub("\\W"," ",text)
  text = re.sub("https?://\S+|www\.\S+","",text)
  text = re.sub("<.*?>","",text)
  text = re.sub("[%s]" % re.escape(string.punctuation),"",text)
  text = re.sub("\n","",text)
  text = re.sub("\w*\d\w","",text)
  return text


In [132]:
df['text'] = df['text'].apply(clean_text)

In [133]:
df = shuffle(df)
df

Unnamed: 0,text,class
22517,tune in to the alternate current radio network...,0
14428,there never seems to be a shortage of smug kn...,0
44601,manila reuters philippine president rodrig...,1
37310,cairo reuters egyptian president abdel fat...,1
24594,hanoi reuters u s president donald trump ...,1
...,...,...
48871,manila reuters a member of philippine pres...,1
4850,infamous donald trump surrogate and supporter ...,0
416,now that seahawks star defensive end michael b...,0
43078,buenos aires reuters dozens of relatives o...,1


## spliting train_test

In [138]:
X = df.loc[:,'text']
y = df.loc[:,'class']

X_train, X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state =42,stratify=y)

In [139]:
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(X_train)
xv_test = vectorizer.transform(X_test)

## training

In [140]:
lr = LogisticRegression()
lr.fit(xv_train,y_train)

In [141]:
prediction = lr.predict(xv_test)
lr.score(xv_test,y_test)

0.9892682926829268

In [142]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4696
           1       0.99      0.99      0.99      5554

    accuracy                           0.99     10250
   macro avg       0.99      0.99      0.99     10250
weighted avg       0.99      0.99      0.99     10250



## Saving the model and also saving the vectorizer to transform input from user

In [143]:
joblib.dump(vectorizer,'vectorizer.jb')
joblib.dump(lr,'lr_model.jb')

['lr_model.jb']