In [1]:
import numpy as np
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
true_df = pd.read_csv('datasets/True.csv')
fake_df = pd.read_csv('datasets/Fake.csv')
true_df['label'] = 0
fake_df['label'] = 1
news_df = pd.concat([true_df, fake_df], axis=0)
news_df = news_df.sample(frac=1).reset_index(drop=True)

In [3]:
news_df.head()
news_df.isnull().sum()
news_df.shape
news_df=news_df.fillna(' ')
news_df.isnull().sum()
news_df['content'] = news_df['title']+' '+news_df['text']
news_df

Unnamed: 0,title,text,subject,date,label,content
0,Gold Star Dad Forced To Cancel Canada Trip Af...,"Khizr Khan, a Muslim-American gold star dad wh...",News,"March 6, 2017",1,Gold Star Dad Forced To Cancel Canada Trip Af...
1,"Kenya president snubs vote crisis meeting, pre...",NAIROBI (Reuters) - Kenyan President Uhuru Ken...,worldnews,"October 19, 2017",0,"Kenya president snubs vote crisis meeting, pre..."
2,PA RESTAURANT Forced To CLOSE After ICE Sweep ...,The Attorney representing the illegal aliens w...,left-news,"Mar 4, 2017",1,PA RESTAURANT Forced To CLOSE After ICE Sweep ...
3,BODY LANGUAGE EXPERT Gives Clear Examples Of S...,Watch the fascinating analysis of Rice s body ...,politics,"Apr 4, 2017",1,BODY LANGUAGE EXPERT Gives Clear Examples Of S...
4,"Trump declares Obamacare 'dead,' urges Democra...",WASHINGTON (Reuters) - U.S. President Donald T...,politicsNews,"October 16, 2017",0,"Trump declares Obamacare 'dead,' urges Democra..."
...,...,...,...,...,...,...
44893,PRICELESS! WATCH MSNBC HOST’S Shocked Response...,THIS IS PRICELESS! The video below shows just ...,politics,"Dec 26, 2017",1,PRICELESS! WATCH MSNBC HOST’S Shocked Response...
44894,ANTIFA BEWARE! BIKERS FOR TRUMP Makes Huge Ann...,BIKERS FOR TRUMP announced their support for P...,politics,"Aug 22, 2017",1,ANTIFA BEWARE! BIKERS FOR TRUMP Makes Huge Ann...
44895,Watch Ben Carson Tell CNN He’s Ready To Use M...,After Donald Trump declared that he would do f...,News,"February 23, 2016",1,Watch Ben Carson Tell CNN He’s Ready To Use M...
44896,"China, Southeast Asia aim to build trust with ...","CLARK, Philippines (Reuters) - China and South...",worldnews,"October 24, 2017",0,"China, Southeast Asia aim to build trust with ..."


In [None]:
X = news_df.drop('label',axis=1)
y = news_df['label']
print(X)

ps = PorterStemmer()
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [ps.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

news_df['content'] = news_df['content'].apply(stemming)

news_df['content']

X = news_df['content'].values
y = news_df['label'].values


vector = TfidfVectorizer()
vector.fit(X)
X = vector.transform(X)

print(X)

                                                   title  \
0       Gold Star Dad Forced To Cancel Canada Trip Af...   
1      Kenya president snubs vote crisis meeting, pre...   
2      PA RESTAURANT Forced To CLOSE After ICE Sweep ...   
3      BODY LANGUAGE EXPERT Gives Clear Examples Of S...   
4      Trump declares Obamacare 'dead,' urges Democra...   
...                                                  ...   
44893  PRICELESS! WATCH MSNBC HOST’S Shocked Response...   
44894  ANTIFA BEWARE! BIKERS FOR TRUMP Makes Huge Ann...   
44895   Watch Ben Carson Tell CNN He’s Ready To Use M...   
44896  China, Southeast Asia aim to build trust with ...   
44897  Trump says 'nothing is off the table' for resp...   

                                                    text       subject  \
0      Khizr Khan, a Muslim-American gold star dad wh...          News   
1      NAIROBI (Reuters) - Kenyan President Uhuru Ken...     worldnews   
2      The Attorney representing the illegal aliens w... 

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state=2)

X_train.shape

model = LogisticRegression()
model.fit(X_train,Y_train)

# on training set
train_y_pred = model.predict(X_train)
print(accuracy_score(train_y_pred,Y_train))

testing_y_pred = model.predict(X_test)
print(accuracy_score(testing_y_pred,Y_test))


In [None]:
input_data = X_test[10]
prediction = model.predict(input_data)

if prediction[0] == 0:
    print('The News Is Real')
else:
    print('The News is Fake')
    
news_df['content'][2]