#### About Dataset

#### Importing dependencies

In [29]:
import numpy as np 
import pandas as pd 
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [30]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [31]:
print('printing the stop words in English')
print(stopwords.words('english'))

printing the stop words in English
['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd", "he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is', 'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 

#### Importing datasets

In [33]:
# Loading the dataset in a pandas dataframe
#news_data = pd.read_csv('C:\Users\USER\Desktop\Datasets\WELFake_Dataset.csv', index_col=0)
news_data = pd.read_csv('C:/Users/USER/Desktop/Datasets/WELFake_Dataset.csv', index_col=0)

news_data.index.name = 'serial_number'

In [34]:
# Checking the number of rows and colums
news_data.shape

(72134, 3)

In [35]:
news_data.head(5)

Unnamed: 0_level_0,title,text,label
serial_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,,Did they post their votes for Hillary already?,1
2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1
3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [36]:
# Checking for missing values 
news_data.isnull().sum()

title    558
text      39
label      0
dtype: int64

In [37]:
# Replacing the null values with empty strings
news_data = news_data.fillna(' ')
news_data.isnull().sum()

title    0
text     0
label    0
dtype: int64

In [38]:
# Merging the title column with text column 
news_data['news'] = news_data['title']+" "+news_data['text']

In [39]:
news_data['news']

serial_number
0        LAW ENFORCEMENT ON HIGH ALERT Following Threat...
1           Did they post their votes for Hillary already?
2        UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...
3        Bobby Jindal, raised Hindu, uses story of Chri...
4        SATAN 2: Russia unvelis an image of its terrif...
                               ...                        
72129    Russians steal research on Trump in hack of U....
72130     WATCH: Giuliani Demands That Democrats Apolog...
72131    Migrants Refuse To Leave Train At Refugee Camp...
72132    Trump tussle gives unpopular Mexican leader mu...
72133    Goldman Sachs Endorses Hillary Clinton For Pre...
Name: news, Length: 72134, dtype: object

In [40]:
# Seperating data(input variable) and label
X = news_data.drop(columns='label',axis=1)
Y = news_data['label']
print(X)
print(Y)

                                                           title  ...                                               news
serial_number                                                     ...                                                   
0              LAW ENFORCEMENT ON HIGH ALERT Following Threat...  ...  LAW ENFORCEMENT ON HIGH ALERT Following Threat...
1                                                                 ...     Did they post their votes for Hillary already?
2              UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...  ...  UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...
3              Bobby Jindal, raised Hindu, uses story of Chri...  ...  Bobby Jindal, raised Hindu, uses story of Chri...
4              SATAN 2: Russia unvelis an image of its terrif...  ...  SATAN 2: Russia unvelis an image of its terrif...
...                                                          ...  ...                                                ...
72129          Russians steal re