In [1]:
# importing essential libraries

import numpy as np
import pandas as pd

In [2]:
# importing datasets

fake = pd.read_csv(r'C:\Users\Administrator\Desktop\ML Project\Fake.csv')
real = pd.read_csv(r'C:\Users\Administrator\Desktop\ML Project\True.csv')

fake = fake[['title','text']]
real = real[['title','text']]

fake['answer']=0
real['answer']=1

In [3]:
fake.head()

Unnamed: 0,title,text,answer
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,0


In [4]:
real.head()

Unnamed: 0,title,text,answer
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [5]:
# mixing both real and fake news

news = real.append(fake)

news.head()

Unnamed: 0,title,text,answer
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [6]:
news.shape

(44898, 3)

In [7]:
# cleaning the dataset

# importing libraries for cleaning dataset
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

wordnet_lemmatizer = WordNetLemmatizer()
news['text'] = news['text'].apply(lambda x: re.split('[ ,.:;]', x))

stop_words = stopwords.words('english') 
news['text'] = news['text'].apply(lambda x: [y for y in x if y not in stop_words])
news['text'] = news['text'].apply(lambda x: [wordnet_lemmatizer.lemmatize(y,'v') for y in x])

news['combined'] = news.apply(lambda x: x['title']+' '+' '.join(x['text']), axis=1)

news.head()

Unnamed: 0,title,text,answer,combined
0,"As U.S. budget fight looms, Republicans flip t...","[WASHINGTON, (Reuters), -, The, head, conserva...",1,"As U.S. budget fight looms, Republicans flip t..."
1,U.S. military to accept transgender recruits o...,"[WASHINGTON, (Reuters), -, Transgender, people...",1,U.S. military to accept transgender recruits o...
2,Senior U.S. Republican senator: 'Let Mr. Muell...,"[WASHINGTON, (Reuters), -, The, special, couns...",1,Senior U.S. Republican senator: 'Let Mr. Muell...
3,FBI Russia probe helped by Australian diplomat...,"[WASHINGTON, (Reuters), -, Trump, campaign, ad...",1,FBI Russia probe helped by Australian diplomat...
4,Trump wants Postal Service to charge 'much mor...,"[SEATTLE/WASHINGTON, (Reuters), -, President, ...",1,Trump wants Postal Service to charge 'much mor...


In [8]:
# splitting dataset into training and test datas 

# here we use only news['combined'] values which is combination of title and text 

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(news['combined'].values,news['answer'],random_state = 0)

In [9]:
# creating bag of model 

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.8, min_df=0.1)
X_train_vec = vectorizer.fit_transform(X_train).toarray()
X_test_vec = vectorizer.transform(X_test).toarray()
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [10]:
# Now we will compare different classifiers

# 1) Naive Bayes classifier

from sklearn.naive_bayes import GaussianNB 
classifier = GaussianNB()
classifier.fit(X_train_vec,y_train)

# predicting test set results

y_pred = classifier.predict(X_test_vec)

# confusion matrix (we can easily calculate accuracy with confusion matrix)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

print(cm)

[[5408  450]
 [  97 5270]]


In [11]:
# 2) KNeighbors Classifier

from sklearn.neighbors import KNeighborsClassifier
classifier1 = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier1.fit(X_train_vec, y_train)

# predicting test set results

y_pred1 = classifier1.predict(X_test_vec)

# making confusion matrix

from sklearn.metrics import confusion_matrix
cm1 = confusion_matrix(y_test, y_pred1)

print(cm1)

[[5341  517]
 [ 223 5144]]


In [12]:
# 3) Random forest classifier

from sklearn.ensemble import RandomForestClassifier 
classifier2 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state= 0)
classifier2.fit(X_train_vec, y_train)

# predicting test set results

y_pred2 = classifier2.predict(X_test_vec)

# making confusion matrix

from sklearn.metrics import confusion_matrix
cm2 = confusion_matrix(y_test, y_pred2)

print(cm2)

[[5846   12]
 [  40 5327]]


In [13]:
# accuracies of all three classifers used above 

# here I calculate accuracies with the help of confusion matrix 
# also here accuracies are high but accuracies depend upon the datasets

print("Accuracy of Naive Bayes classifier is : 95.12 \n")
print("Accuracy of KNeighbors classifier is : 93.40 \n")
print("Accuracy of Random forest classifier is : 99.5 \n")

Accuracy of Naive Bayes classifier is : 95.12 

Accuracy of KNeighbors classifier is : 93.40 

Accuracy of Random forest classifier is : 99.5 

