# Importing the dependencies

In [44]:
import numpy as np 
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder 

In [45]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sevan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sevan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [46]:
#printing the stop words to be removed
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

# Analysing the dataset

In [47]:
#loading the datasets
news_dataset= pd.read_csv(r"C:\Users\sevan\Downloads\fake_or_real_news.csv")

In [48]:
#getting the rows and columns of the dataset
news_dataset.shape

(7818, 6)

(7818, 6)

In [49]:
#printing the first 5 rows
news_dataset.head()

Unnamed: 0.1,Unnamed: 0,title,text,label,Unnamed: 4,Unnamed: 5
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,,
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,,
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,,
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,,
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,,


Unnamed: 0.1,Unnamed: 0,title,text,label,Unnamed: 4,Unnamed: 5
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,,
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,,
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,,
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,,
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,,


In [50]:
#counting null values
news_dataset.isnull().sum()

Unnamed: 0     242
title          633
text           889
label         1063
Unnamed: 4    7817
Unnamed: 5    7817
dtype: int64

Unnamed: 0     242
title          633
text           889
label         1063
Unnamed: 4    7817
Unnamed: 5    7817
dtype: int64

In [51]:
#replacing the null values with empty string
news_dataset=news_dataset.fillna('')

In [52]:
news_dataset.isnull().sum()

Unnamed: 0    0
title         0
text          0
label         0
Unnamed: 4    0
Unnamed: 5    0
dtype: int64

Unnamed: 0    0
title         0
text          0
label         0
Unnamed: 4    0
Unnamed: 5    0
dtype: int64

In [53]:
#merging the title and text
news_dataset['content']=news_dataset['title']+''+news_dataset['text']+''+news_dataset['label']

In [54]:
display(news_dataset['content'])

0       You Can Smell Hillary’s FearDaniel Greenfield,...
1       Watch The Exact Moment Paul Ryan Committed Pol...
2       Kerry to go to Paris in gesture of sympathyU.S...
3       Bernie supporters on Twitter erupt in anger ag...
4       The Battle of New York: Why This Primary Matte...
                              ...                        
7813    State Department says it can't find emails fro...
7814    The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
7815    Anti-Trump Protesters Are Tools of the Oligarc...
7816    In Ethiopia, Obama seeks progress on peace, se...
7817    Jeb Bush Is Suddenly Attacking Trump. Here's W...
Name: content, Length: 7818, dtype: object

0       You Can Smell Hillary’s FearDaniel Greenfield,...
1       Watch The Exact Moment Paul Ryan Committed Pol...
2       Kerry to go to Paris in gesture of sympathyU.S...
3       Bernie supporters on Twitter erupt in anger ag...
4       The Battle of New York: Why This Primary Matte...
                              ...                        
7813    State Department says it can't find emails fro...
7814    The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
7815    Anti-Trump Protesters Are Tools of the Oligarc...
7816    In Ethiopia, Obama seeks progress on peace, se...
7817    Jeb Bush Is Suddenly Attacking Trump. Here's W...
Name: content, Length: 7818, dtype: object

In [55]:
#deleting the unwanted columns
del news_dataset['Unnamed: 4']
del news_dataset['Unnamed: 5']
del news_dataset['title']


In [56]:
# separting the data and label
X=news_dataset.drop(columns=('label'),axis=1)
Y=news_dataset['label']

In [57]:
print(X)
print(Y)

     Unnamed: 0                                               text  \
0          8476  Daniel Greenfield, a Shillman Journalism Fello...   
1         10294  Google Pinterest Digg Linkedin Reddit Stumbleu...   
2          3608  U.S. Secretary of State John F. Kerry said Mon...   
3         10142  — Kaydee King (@KaydeeKing) November 9, 2016 T...   
4           875  It's primary day in New York and front-runners...   
...         ...                                                ...   
7813       4490  The State Department told the Republican Natio...   
7814       8062  The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...   
7815       8622   Anti-Trump Protesters Are Tools of the Oligar...   
7816       4021  ADDIS ABABA, Ethiopia —President Obama convene...   
7817       4330  Jeb Bush Is Suddenly Attacking Trump. Here's W...   

                                                content  
0     You Can Smell Hillary’s FearDaniel Greenfield,...  
1     Watch The Exact Moment Paul Ryan Comm

In [58]:
#first 5 rows of revised dataset
news_dataset.head()

Unnamed: 0.1,Unnamed: 0,text,label,content
0,8476,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,"You Can Smell Hillary’s FearDaniel Greenfield,..."
1,10294,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,Watch The Exact Moment Paul Ryan Committed Pol...
2,3608,U.S. Secretary of State John F. Kerry said Mon...,REAL,Kerry to go to Paris in gesture of sympathyU.S...
3,10142,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,Bernie supporters on Twitter erupt in anger ag...
4,875,It's primary day in New York and front-runners...,REAL,The Battle of New York: Why This Primary Matte...


Unnamed: 0.1,Unnamed: 0,text,label,content
0,8476,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,"You Can Smell Hillary’s FearDaniel Greenfield,..."
1,10294,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,Watch The Exact Moment Paul Ryan Committed Pol...
2,3608,U.S. Secretary of State John F. Kerry said Mon...,REAL,Kerry to go to Paris in gesture of sympathyU.S...
3,10142,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,Bernie supporters on Twitter erupt in anger ag...
4,875,It's primary day in New York and front-runners...,REAL,The Battle of New York: Why This Primary Matte...


In [59]:
#checking for duplicate values in dataset
news_dataset.duplicated().sum()

250

250

In [60]:
#sorting the duplicate values to one place
news_dataset.sort_values("text", inplace = True)

In [61]:
#deleting the duplicate values
news_dataset.drop_duplicates(subset ="text",keep = False, inplace = True)

In [62]:
news_dataset.duplicated().sum()

0

0

# Label Encoding

In [63]:
#real or fake text is converted to numerical values
le = LabelEncoder()
news_dataset['label']=le.fit_transform(news_dataset.label)
news_dataset.head()

Unnamed: 0.1,Unnamed: 0,text,label,content
6728,9220,"\n\n \nIf Clinton is elected, Obama will hand ...",420,Hillary Is 70 Days Away From Controlling All F...
609,5438,"\n\n \nIn the past 24 hours, some very stunnin...",420,Just Another Day In the Life of the Clinton Cr...
665,7811,\n\n \nUPDATE: HILLARY CLINTON IS AGAIN UNDER ...,420,"14 Days to Do 14 Things, If Hillary’s Indicted..."
407,2840,\n\n Another deadline lapsed Thursday in the s...,421,Iran: Why the best outcome now is to keep nego...
1862,10318,"\n\nAs it stands now, the election has been st...",420,Selected Not Elected: The Election Has Been St...


Unnamed: 0.1,Unnamed: 0,text,label,content
6728,9220,"\n\n \nIf Clinton is elected, Obama will hand ...",420,Hillary Is 70 Days Away From Controlling All F...
609,5438,"\n\n \nIn the past 24 hours, some very stunnin...",420,Just Another Day In the Life of the Clinton Cr...
665,7811,\n\n \nUPDATE: HILLARY CLINTON IS AGAIN UNDER ...,420,"14 Days to Do 14 Things, If Hillary’s Indicted..."
407,2840,\n\n Another deadline lapsed Thursday in the s...,421,Iran: Why the best outcome now is to keep nego...
1862,10318,"\n\nAs it stands now, the election has been st...",420,Selected Not Elected: The Election Has Been St...


In [64]:
news_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6569 entries, 6728 to 3515
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  6569 non-null   object
 1   text        6569 non-null   object
 2   label       6569 non-null   int32 
 3   content     6569 non-null   object
dtypes: int32(1), object(3)
memory usage: 230.9+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6569 entries, 6728 to 3515
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  6569 non-null   object
 1   text        6569 non-null   object
 2   label       6569 non-null   int32 
 3   content     6569 non-null   object
dtypes: int32(1), object(3)
memory usage: 230.9+ KB


In [65]:
#separating the data and label
X= news_dataset['content'].values
Y= news_dataset['label'].values

In [66]:
print(X)

['Hillary Is 70 Days Away From Controlling All Food and Water\n\n \nIf Clinton is elected, Obama will hand her the power of a dictator.\nUpon Inauguration Day, Clinton will have the power to to the following:\n \nAs President, Hillary Clinton Would Have the Power to Enact Slave Labor According to EO 13603, the President, or the head of any federal agency that he shall designate, can conscript â€œpersons of outstanding experience and ability without compensation,â€\x9d in both â€œpeacetime and times of national emergency.â€\x9d Â I can hear the Obama supporters now as they will write to me and say, â€œObama would never do that, you are drinking from the Kool-Aidâ€\x9d.Â Well, here it is, you can read it for yourself.\nSec.Â 502.Â Â Consultants.Â The head of each agency otherwise delegated functions under this order is delegated the authority of the President under sections 710(b) and (c) of the Act, 50 U.S.C. App. 2160(b), (c),Â to employ persons of outstanding experience and ability wi

['Hillary Is 70 Days Away From Controlling All Food and Water\n\n \nIf Clinton is elected, Obama will hand her the power of a dictator.\nUpon Inauguration Day, Clinton will have the power to to the following:\n \nAs President, Hillary Clinton Would Have the Power to Enact Slave Labor According to EO 13603, the President, or the head of any federal agency that he shall designate, can conscript â€œpersons of outstanding experience and ability without compensation,â€\x9d in both â€œpeacetime and times of national emergency.â€\x9d Â I can hear the Obama supporters now as they will write to me and say, â€œObama would never do that, you are drinking from the Kool-Aidâ€\x9d.Â Well, here it is, you can read it for yourself.\nSec.Â 502.Â Â Consultants.Â The head of each agency otherwise delegated functions under this order is delegated the authority of the President under sections 710(b) and (c) of the Act, 50 U.S.C. App. 2160(b), (c),Â to employ persons of outstanding experience and ability wi

In [67]:
print(Y)

[420 420 420 ... 420 420 420]
[420 420 420 ... 420 420 420]


In [68]:
#converting the text to numerical data a comp can understadn
vectorizer=TfidfVectorizer()
vectorizer.fit(X)
    
X=vectorizer.transform(X)

Splitting the dataset

In [69]:
X_train,X_test,Y_train, Y_test= train_test_split(X,Y, test_size=0.2,random_state=2)

In [70]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report


# Random Forest Classifier

In [71]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(n_estimators=10,criterion='entropy')

In [72]:
model.fit(X_train, Y_train)

In [73]:
X_train_prediction=model.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction, Y_train)

In [74]:
X_test_prediction=model.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction, Y_test)

In [75]:
a=(training_data_accuracy*100)
b=(test_data_accuracy*100)
a=str(round(a,3))
b=str(round(b,3))
print('Accuracy of training data :',a,'%')
print('Accuracy of testing data :',b, '%')

Accuracy of training data : 99.562 %
Accuracy of testing data : 76.027 %
Accuracy of training data : 99.562 %
Accuracy of testing data : 76.027 %


In [76]:
#Pedicting the values
X_new = X_test[53]

prediction = model.predict(X_new)
print(prediction)

if (prediction[0]==0):
  print('The news is Real')
else:
  print('The news is Fake')

[420]
The news is Fake
[420]
The news is Fake


In [77]:
print(Y_test[53])

420
420


# Decision Tree Classifier

In [78]:
from sklearn.tree import DecisionTreeClassifier
model1= DecisionTreeClassifier()
model1.fit(X_train,Y_train)

In [79]:
X_train_prediction=model1.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction, Y_train)

In [80]:
X_test_prediction=model1.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction, Y_test)

In [82]:
a=(training_data_accuracy*100)
b=(test_data_accuracy*100)
a=str(round(a,3))
b=str(round(b,3))
print('Accuracy of training data :',a,'%')
print('Accuracy of testing data :',b, '%')

Accuracy of training data : 100.0 %
Accuracy of testing data : 87.291 %
Accuracy of training data : 100.0 %
Accuracy of testing data : 87.291 %


In [83]:
#Pedicting the values
X_new = X_test[45]

prediction = model1.predict(X_new)
print(prediction)

if (prediction[0]==0):
  print('The news is Real')
else:
  print('The news is Fake')

[420]
The news is Fake
[420]
The news is Fake


In [84]:
print(Y_test[45])

420
420


CONFUSION MATRIX

In [85]:
y_pred = model1.predict(X_test)
matrix=confusion_matrix(Y_test, y_pred)
print('Confusion Matrix :\n', matrix)


Confusion Matrix :
 [[7 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Confusion Matrix :
 [[7 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


# Logistic Regression

In [86]:
# logistic regression
from sklearn.linear_model import LogisticRegression
model2=LogisticRegression()

In [87]:
model2.fit(X_train,Y_train)

In [88]:
X_train_prediction=model2.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction, Y_train)

In [89]:
X_test_prediction=model2.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction, Y_test)

In [90]:
a=(training_data_accuracy*100)
b=(test_data_accuracy*100)
a=str(round(a,3))
b=str(round(b,3))
print('Accuracy of training data :',a,'%')
print('Accuracy of testing data :',b, '%')

Accuracy of training data : 89.343 %
Accuracy of testing data : 85.312 %
Accuracy of training data : 89.343 %
Accuracy of testing data : 85.312 %


In [91]:
#Pedicting the values
X_new = X_test[456]

prediction = model2.predict(X_new)
print(prediction)

if (prediction[0]==0):
  print('The news is Real')
else:
  print('The news is Fake')

[420]
The news is Fake
[420]
The news is Fake


In [92]:
print(Y_test[456])

420
420


# Random Forest Agressor

In [93]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
model3=RandomForestRegressor()
model3.fit(X_train, Y_train)