In [1]:
import numpy as np
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score

In [2]:
fake_data_path = r"C:\Users\DELL\Desktop\Fake_News_Detection\archive (1)\fake.csv"
true_data_path = r"C:\Users\DELL\Desktop\Fake_News_Detection\archive (1)\true.csv"
df_fake = pd.read_csv(fake_data_path )
df_true = pd.read_csv(true_data_path)

In [3]:
df_fake.shape

(23481, 4)

In [4]:
df_fake ['label'] = 0
df_true ['label'] = 1

In [5]:
df_true.shape

(21417, 5)

In [6]:
df = pd.concat([df_fake, df_true])
df = df.sample(frac=1)
df.reset_index(drop=True, inplace=True)

In [7]:
df.shape

(44898, 5)

In [8]:
df

Unnamed: 0,title,text,subject,date,label
0,Man Behind Trump’s Insane Claim That Obama Ta...,Donald Trump is now demanding that any probe i...,News,"March 5, 2017",0
1,RAW VIDEO: A SHOCKING TOUR Of The Detroit Ghet...,,politics,"Sep 10, 2016",0
2,JUST IN: MUELLER’S “Right Hand Man” Represente...,You just can t make this stuff up Yet another ...,left-news,"Dec 8, 2017",0
3,(VIDEO) COLLEGE STUDENTS REACT TO NEW BIZARRE ...,Young Americans for Freedom at the University ...,politics,"Sep 4, 2015",0
4,"Britain, EU have very different legal stances ...",LONDON (Reuters) - Britain and the European Un...,worldnews,"September 5, 2017",1
...,...,...,...,...,...
44893,Israel's right wing has grand plans for Trump era,JERUSALEM (Reuters) - Israel’s right wing has ...,politicsNews,"January 19, 2017",1
44894,U.S. judge upholds Virginia's voter ID law,"RICHMOND, Va. (Reuters) - A U.S. federal judge...",politicsNews,"May 19, 2016",1
44895,National Republican Just Blamed Women For Ass...,Republican presidential candidate and current ...,News,"April 15, 2016",0
44896,Ugandan army attacks rebel camps in eastern Congo,KAMPALA (Reuters) - The Ugandan army on Friday...,worldnews,"December 22, 2017",1


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44898 entries, 0 to 44897
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    44898 non-null  object
 1   text     44898 non-null  object
 2   subject  44898 non-null  object
 3   date     44898 non-null  object
 4   label    44898 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 1.7+ MB


In [10]:
df.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [11]:
df.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [12]:
df['content'] = df['title'] +' ' +['subject	']

In [13]:
df

Unnamed: 0,title,text,subject,date,label,content
0,Man Behind Trump’s Insane Claim That Obama Ta...,Donald Trump is now demanding that any probe i...,News,"March 5, 2017",0,Man Behind Trump’s Insane Claim That Obama Ta...
1,RAW VIDEO: A SHOCKING TOUR Of The Detroit Ghet...,,politics,"Sep 10, 2016",0,RAW VIDEO: A SHOCKING TOUR Of The Detroit Ghet...
2,JUST IN: MUELLER’S “Right Hand Man” Represente...,You just can t make this stuff up Yet another ...,left-news,"Dec 8, 2017",0,JUST IN: MUELLER’S “Right Hand Man” Represente...
3,(VIDEO) COLLEGE STUDENTS REACT TO NEW BIZARRE ...,Young Americans for Freedom at the University ...,politics,"Sep 4, 2015",0,(VIDEO) COLLEGE STUDENTS REACT TO NEW BIZARRE ...
4,"Britain, EU have very different legal stances ...",LONDON (Reuters) - Britain and the European Un...,worldnews,"September 5, 2017",1,"Britain, EU have very different legal stances ..."
...,...,...,...,...,...,...
44893,Israel's right wing has grand plans for Trump era,JERUSALEM (Reuters) - Israel’s right wing has ...,politicsNews,"January 19, 2017",1,Israel's right wing has grand plans for Trump ...
44894,U.S. judge upholds Virginia's voter ID law,"RICHMOND, Va. (Reuters) - A U.S. federal judge...",politicsNews,"May 19, 2016",1,U.S. judge upholds Virginia's voter ID law sub...
44895,National Republican Just Blamed Women For Ass...,Republican presidential candidate and current ...,News,"April 15, 2016",0,National Republican Just Blamed Women For Ass...
44896,Ugandan army attacks rebel camps in eastern Congo,KAMPALA (Reuters) - The Ugandan army on Friday...,worldnews,"December 22, 2017",1,Ugandan army attacks rebel camps in eastern Co...


In [14]:
df['content'][22424]

"U.S. lawmakers want 'supercharged' response to North Korea nuclear tests subject\t"

In [15]:
ps = PorterStemmer()
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [ps.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

In [16]:
df['content'] = df['content'].apply(stemming)

LookupError: 
**********************************************************************
  Resource [93mstopwords[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('stopwords')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mcorpora/stopwords[0m

  Searched in:
    - 'C:\\Users\\DELL/nltk_data'
    - 'C:\\Users\\DELL\\anaconda3\\nltk_data'
    - 'C:\\Users\\DELL\\anaconda3\\share\\nltk_data'
    - 'C:\\Users\\DELL\\anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\DELL\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [17]:
df['content']

0         Man Behind Trump’s Insane Claim That Obama Ta...
1        RAW VIDEO: A SHOCKING TOUR Of The Detroit Ghet...
2        JUST IN: MUELLER’S “Right Hand Man” Represente...
3        (VIDEO) COLLEGE STUDENTS REACT TO NEW BIZARRE ...
4        Britain, EU have very different legal stances ...
                               ...                        
44893    Israel's right wing has grand plans for Trump ...
44894    U.S. judge upholds Virginia's voter ID law sub...
44895     National Republican Just Blamed Women For Ass...
44896    Ugandan army attacks rebel camps in eastern Co...
44897    HYSTERICAL LIBERALS CHEER FOR JOHN KERRY…He Ca...
Name: content, Length: 44898, dtype: object

In [18]:
X = df['content'].values
y = df['label'].values

In [19]:
vector = TfidfVectorizer()
vector.fit(X)
X = vector.transform(X)

In [20]:

print(X)


  (0, 20555)	0.3867296701750513
  (0, 19988)	0.10778278265394028
  (0, 19743)	0.18709984505441987
  (0, 19217)	0.08591153527940197
  (0, 18670)	0.18613818312557961
  (0, 18428)	0.3520022149887505
  (0, 18021)	0.04014746597077468
  (0, 16687)	0.2804213371254493
  (0, 12842)	0.14656570904629415
  (0, 11436)	0.22063299681084286
  (0, 11329)	0.2491888682111805
  (0, 10355)	0.178750617946839
  (0, 9810)	0.2797169775507587
  (0, 9079)	0.17002983107822958
  (0, 8877)	0.1700071168111622
  (0, 3670)	0.2726892119113126
  (0, 2064)	0.260413491953068
  (0, 686)	0.3394096588181415
  (1, 19988)	0.22661134580836648
  (1, 18975)	0.3442559267597425
  (1, 18675)	0.12786461572533991
  (1, 18021)	0.04220465954958461
  (1, 16828)	0.2651644974281313
  (1, 16037)	0.35520850841449614
  (1, 14954)	0.3829276651469309
  :	:
  (44896, 15021)	0.3866810282180701
  (44896, 9597)	0.12649578220439525
  (44896, 6254)	0.3722175434269313
  (44896, 4197)	0.37994522750466503
  (44896, 3071)	0.43084148893116764
  (44896, 15

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state=2)

In [22]:
X_train.shape

(35918, 20896)

In [23]:
model = LogisticRegression()
model.fit(X_train,Y_train)

In [24]:
train_y_pred = model.predict(X_train)
print(accuracy_score(train_y_pred,Y_train))

0.9714627763238488


In [25]:
testing_y_pred = model.predict(X_test)
print(accuracy_score(testing_y_pred,Y_test))

0.957238307349666


In [None]:
model = RandomForestRegressor()
model.fit(X_train,Y_train)
train_y_pred = model.predict(X_train)
print(accuracy_score(train_y_pred,Y_train))
testing_y_pred = model.predict(X_test)
print(accuracy_score(testing_y_pred,Y_test))

In [None]:
input_data = X_test[10]
prediction = model.predict(input_data)


In [None]:
if prediction[0] == 0:
    print('The News Is Real')
else:
    print('The News is Fake')

In [None]:
df['content'][2]