In [26]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

In [27]:
data_fake = pd.read_csv(r'C:\Users\DEEPA\Desktop\Triage\Projects\Fake_News_Detection\Fake.csv')
data_true = pd.read_csv(r'C:\Users\DEEPA\Desktop\Triage\Projects\Fake_News_Detection\True.csv')

In [28]:
#True Data

data_true.head() 

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [29]:
#Fake Data

data_fake.head() 

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [30]:
#Assigning classes to the dataset

data_fake["class"] = 0 
data_true["class"] = 1

In [31]:
#Checking number of rows and columns

data_fake.shape, data_true.shape 

((23481, 5), (21417, 5))

In [32]:
#Manual testing for both the datasets

data_fake_manual_testing = data_fake.tail(10)
for i in range(23480,23470,-1):
    data_fake.drop([i], axis = 0, inplace = True)

data_true_manual_testing = data_true.tail(10)
for i in range(21416,21406,-1):
    data_true.drop([i], axis = 0, inplace = True)

In [34]:
data_fake_manual_testing.loc[:,"class"] = 0
data_true_manual_testing.loc[:,"class"] = 1

In [35]:
#Merging the data extracted from both the datasets
data_merge = pd.concat([data_fake, data_true],axis = 0)
data_merge.head(10)

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",0
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",0
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",0
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",0
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",0


In [38]:
data = data_merge.drop(['title','subject','date'],axis = 1)

In [44]:
#Function to Clean Text

def wordopt(Text):
    text = Text.lower()
    text = re.sub('\[.*?\]','',text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+','',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\n','',text)
    text = re.sub('\w*\d\w*','',text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [45]:
data['text'] = data['text'].apply(wordopt)

In [46]:
x = data['text']
y = data['class']

In [47]:
#Defining Training and Testing Data and Splitting Them into &5 -25 Percent Ratio

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.25)

In [48]:
#Converting Raw Data into Matrix for Further process

from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [49]:
#Creation of First Model

from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [51]:
#Checking the Model Accuracy 

pred_lr = LR.predict(xv_test)
LR.score(xv_test, y_test)

0.9877005347593583

In [52]:
#Checking the Classification Report

print(classification_report(y_test,pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5869
           1       0.99      0.99      0.99      5351

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [56]:
#Creation of Second Model

from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [57]:
#Checking the Model Accuracy

pred_dt = DT.predict(xv_test)
DT.score(xv_test, y_test)

0.9958110516934047

In [58]:
#Checking the Classification Report

print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5869
           1       1.00      1.00      1.00      5351

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [68]:
#Checking Fake News

def output_label(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not a Fake News"
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    
    return print("\n\nLR Prediction: {} \nDT Prediction: {}".format(output_label(pred_LR[0]),output_label(pred_DT[0])))

In [69]:
news = str(input())
manual_testing(news)

The article was published recently in the peer-reviewed journal JAMA Network Open.   New Delhi: The Union Ministry of Women and Child Development Tuesday denounced a recently published article on "zero-food children" in India, and labelled it as a deliberate and malicious attempt to sensationalise fake news. In a statement, the ministry said the article lacked primary research and made misleading assertions.  The article referred to was published recently in the peer-reviewed journal JAMA Network Open. The study claimed the prevalence of "zero-food children" in India was at 19.3 per cent and drew attention to extreme food deprivation among children.


LR Prediction: Fake News 
DT Prediction: Fake News
