# Importing Libraries

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import pickle
from wordcloud import WordCloud

#  Data Prepration

In [33]:
df1 = pd.read_csv('news.csv')
df1.head()

Unnamed: 0.1,Unnamed: 0,title,text,label,publication
0,0,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,The News International
1,1,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,DAWN
2,2,‘Britain’s Schindler’ Dies at 106,A Czech stockbroker who saved more than 650 Je...,REAL,The News
3,3,Fact check: Trump and Clinton at the 'commande...,Hillary Clinton and Donald Trump made some ina...,REAL,DAWN
4,4,Iran reportedly makes new push for uranium con...,Iranian negotiators reportedly have made a las...,REAL,DAWN


In [34]:
df1.rename({"Unnamed: 0":"a"}, axis="columns", inplace=True)
df1.drop(["a"], axis=1, inplace=True)
df1.head()

Unnamed: 0,title,text,label,publication
0,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,The News International
1,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,DAWN
2,‘Britain’s Schindler’ Dies at 106,A Czech stockbroker who saved more than 650 Je...,REAL,The News
3,Fact check: Trump and Clinton at the 'commande...,Hillary Clinton and Donald Trump made some ina...,REAL,DAWN
4,Iran reportedly makes new push for uranium con...,Iranian negotiators reportedly have made a las...,REAL,DAWN


In [35]:
df1['titletext'] = df1.publication + ' ' + df1.title + ' ' + df1.text
df1.drop(['title','text', 'publication'], axis=1, inplace=True)
df1.head()

Unnamed: 0,label,titletext
0,REAL,The News International Kerry to go to Paris in...
1,REAL,DAWN The Battle of New York: Why This Primary ...
2,REAL,The News ‘Britain’s Schindler’ Dies at 106 A C...
3,REAL,DAWN Fact check: Trump and Clinton at the 'com...
4,REAL,DAWN Iran reportedly makes new push for uraniu...


In [36]:
df2 = pd.read_csv('True.csv')
df2.head()

Unnamed: 0.1,Unnamed: 0,date,publication,text,title
0,0,2016-12-31,New York Times,WASHINGTON — Congressional Republicans have...,House Republicans Fret About Winning Their Hea...
1,1,2017-06-19,New York Times,"After the bullet shells get counted, the blood...",Rift Between Officers and Residents as Killing...
2,2,2017-01-06,New York Times,"When Walt Disney’s “Bambi” opened in 1942, cri...","Tyrus Wong, ‘Bambi’ Artist Thwarted by Racial ..."
3,3,2017-04-10,New York Times,"Death may be the great equalizer, but it isn’t...","Among Deaths in 2016, a Heavy Toll in Pop Musi..."
4,4,2017-01-02,New York Times,"SEOUL, South Korea — North Korea’s leader, ...",Kim Jong-un Says North Korea Is Preparing to T...


In [37]:
df2.drop(['Unnamed: 0','date'], axis=1, inplace=True)
df2['label'] = 'REAL'
df2.head()

Unnamed: 0,publication,text,title,label
0,New York Times,WASHINGTON — Congressional Republicans have...,House Republicans Fret About Winning Their Hea...,REAL
1,New York Times,"After the bullet shells get counted, the blood...",Rift Between Officers and Residents as Killing...,REAL
2,New York Times,"When Walt Disney’s “Bambi” opened in 1942, cri...","Tyrus Wong, ‘Bambi’ Artist Thwarted by Racial ...",REAL
3,New York Times,"Death may be the great equalizer, but it isn’t...","Among Deaths in 2016, a Heavy Toll in Pop Musi...",REAL
4,New York Times,"SEOUL, South Korea — North Korea’s leader, ...",Kim Jong-un Says North Korea Is Preparing to T...,REAL


In [38]:
df2['titletext'] = df2.publication + ' ' + df2.title + ' ' + df2.text
df2.drop(['title','text', 'publication'], axis=1, inplace=True)
df2.head()

Unnamed: 0,label,titletext
0,REAL,New York Times House Republicans Fret About Wi...
1,REAL,New York Times Rift Between Officers and Resid...
2,REAL,"New York Times Tyrus Wong, ‘Bambi’ Artist Thwa..."
3,REAL,"New York Times Among Deaths in 2016, a Heavy T..."
4,REAL,New York Times Kim Jong-un Says North Korea Is...


In [39]:
df3 = pd.read_csv('Fake.csv')
df3.head()

Unnamed: 0.1,Unnamed: 0,title,text,date,publication
0,0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,"December 31, 2017",Trump of Fame
1,1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,"December 31, 2017",The Spooner Advocate
2,2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...","December 30, 2017",The Anaconda Standard
3,3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...","December 29, 2017",The Anaconda Standard
4,4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,"December 25, 2017",The Anaconda Standard


In [40]:
df3.drop(['Unnamed: 0','date'], axis=1, inplace=True)
df3['label'] = 'FAKE'
df3.head()

Unnamed: 0,title,text,publication,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,Trump of Fame,FAKE
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,The Spooner Advocate,FAKE
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",The Anaconda Standard,FAKE
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",The Anaconda Standard,FAKE
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,The Anaconda Standard,FAKE


In [41]:
df3['titletext'] =df3.publication + ' ' + df3.title + ' ' + df3.text
df3.drop(['title','text', 'publication'], axis=1, inplace=True)
df3.head()

Unnamed: 0,label,titletext
0,FAKE,Trump of Fame Donald Trump Sends Out Embarras...
1,FAKE,The Spooner Advocate Drunk Bragging Trump Sta...
2,FAKE,The Anaconda Standard Sheriff David Clarke Be...
3,FAKE,The Anaconda Standard Trump Is So Obsessed He...
4,FAKE,The Anaconda Standard Pope Francis Just Calle...


In [42]:
df1.isnull().sum()

label        0
titletext    0
dtype: int64

In [43]:
df2 = df2.dropna(how='any',axis=0) 

In [44]:
df2.isnull().sum()

label        0
titletext    0
dtype: int64

In [45]:
df3.isnull().sum()

label        0
titletext    0
dtype: int64

In [47]:
news = pd.concat([df1,df2,df3])
news.sample(10)

Unnamed: 0,label,titletext
163875,REAL,"The News International After decades of war, C..."
5505,REAL,New York Times The Garrison Keillor You Never ...
15628,FAKE,The Anaconda Standard LAWLESS: OBAMA WON’T TAK...
80247,REAL,New York Post Hijab-wearing model kicks off Mi...
9524,REAL,Breitbart 71-Year-Old Dana Rohrabacher Staffer...
16647,FAKE,Trump of Fame WHY DECISION LIBERAL JUDGE IN CO...
226,REAL,The News International Momentum to bar Syrian ...
28057,REAL,Breitbart Top Downing Street Staffers Hurled A...
56195,REAL,Atlantic Which Republicans Oppose Donald Trump...
4762,FAKE,The Anaconda Standard Twitter DESTROYS Trump ...


In [49]:
news.shape

(193801, 2)

In [50]:
X=news.titletext
X.shape

(193801,)

In [51]:
Y=news.label
Y.shape

(193801,)

#  Data Modeling

In [52]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=7)

In [53]:

tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
tfidf_test=tfidf_vectorizer.transform(x_test)

### Passive Agressive Classifier

In [54]:
PAC=PassiveAggressiveClassifier(max_iter=1000)
PAC.fit(tfidf_train,y_train)
y_pred=PAC.predict(tfidf_test)

In [55]:
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 99.51%


In [56]:
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])

array([[ 5285,   135],
       [   54, 33287]], dtype=int64)

### Multinomial Niave Bayyes Classifier

In [57]:
MultNB = MultinomialNB()
MultNB.fit(tfidf_train,y_train)
MultNB_pred=MultNB.predict(tfidf_test)


In [58]:
score1=accuracy_score(y_test,MultNB_pred)
print(f'Multinomial Niave Bayyes Accuracy: {round(score1*100,2)}%')

Multinomial Niave Bayyes Accuracy: 86.76%


In [59]:
confusion_matrix(y_test,MultNB_pred, labels=['FAKE','REAL'])

array([[  292,  5128],
       [    3, 33338]], dtype=int64)

### Random Forest

In [60]:
rfc=RandomForestClassifier(n_estimators= 10, random_state= 7)
rfc.fit(tfidf_train,y_train)
rfc_pred = rfc.predict(tfidf_test)

In [61]:
score2=accuracy_score(y_test,rfc_pred)
print(f'Random Forest Accuracy: {round(score2*100,2)}%')

Random Forest Accuracy: 96.22%


In [62]:
confusion_matrix(y_test,rfc_pred, labels=['FAKE','REAL'])

array([[ 4054,  1366],
       [  100, 33241]], dtype=int64)

### Ensemble Learning 

In [63]:
try:
    filename = 'finalized_model.sav'
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.score(tfidf_test, y_test)
    print(f'Ensemble Learning Accuracy: {round(result*100,2)}%')
except:    
    Ensemb = VotingClassifier( estimators = [('PAC',PAC),('MultNB',MultNB),('rfc',rfc)], voting = 'hard')
    Ensemb.fit(tfidf_train,y_train)
    Ensemb_pred=Ensemb.predict(tfidf_test)
    filename = 'finalized_model.sav'
    pickle.dump(Ensemb, open(filename, 'wb'))
    score4=accuracy_score(y_test,Ensemb_pred)
    print(f'Ensemble Learning Accuracy: {round(score4*100,2)}%')

Ensemble Learning Accuracy: 96.43%


In [None]:
filename = 'finalized_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
publisher = str(input("enter publisher name"))
title = str(input("enter news title"))
text = str(input("enter news content"))
ab = publisher + ' ' + title + ' ' + text
ab=[ab]
textab=tfidf_vectorizer.transform(ab)
pridi=loaded_model.predict(textab)
print(pridi)