In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate  #for tablular format

#For Preprocessing
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

#The Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings("ignore")

#FUNCTIONS

###Data checking

In [40]:
def datacheck(data):

    print(f"Rows: {data.shape[0]}\tColumns: {data.shape[1]}")
    table_data = []

    for i in data.columns:
      table_data.append([i, len(data[i].unique()), data[i].isna().sum(), data[i].dtypes])
    print(tabulate(table_data, headers=["Column", "Total Unique Values", "Missing Values","Data Type"], tablefmt="fancy_grid"))

    return

###Processing texts

In [41]:
def text_pre(word):
    word = word.lower()
    word = re.sub('\[.*?\]', '', word)                  #Removing anything enclosed in square brackets
    word = re.sub("\\W"," ",word)                       #Replaces all non-alphanumeric characters
    word = re.sub('https?://\S+|www\.\S+', '', word)    #Remove all URLs
    word = re.sub('<.*?>+', '', word)                   #Remove HTML tags
    word = re.sub('[%s]' % re.escape(string.punctuation), '', word)  #Remove punctuation characters
    word = re.sub('\n', '', word)                       #Removes newline characters
    word = re.sub('\w*\d\w*', '', word)                 #Removes alphanumeric characters and digits
    return word

###MODELS

In [42]:
def mod(model,x_train_std, x_test_std, y_train, y_test):

    #classification_report
    y_pred = model.predict(x_test_std)
    print(classification_report(y_test, y_pred))

    print("\n")
    return

For Prediction

In [75]:
def pred(model,news):

    y_pred = model.predict(news)

    if y_pred == 0:
        Z= "Fake News"
    elif y_pred == 1:
        Z= "True News"

    print(tabulate([{Z}], headers=[{model}], tablefmt="fancy_grid"))
    return

###Testing

In [76]:
def testing(news):

    #news = str(input())
    new_df1 = pd.DataFrame({"text": [news]})
    new_df1["text"] = new_df1["text"].apply(text_pre) #text preprocessing function
    new_df1 = vectorizer.transform(new_df1["text"])

    pred(logi,new_df1)
    pred(tree,new_df1)
    pred(ranfor,new_df1)
    pred(nnmodel,new_df1)

    return

#DATASET

In [45]:
dft = pd.read_csv('/content/drive/MyDrive/Python/True.csv')
dff = pd.read_csv('/content/drive/MyDrive/Python/Fake.csv')

In [46]:
#dft.head()
dff.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [47]:
datacheck(dft)

Rows: 21417	Columns: 4
╒══════════╤═══════════════════════╤══════════════════╤═════════════╕
│ Column   │   Total Unique Values │   Missing Values │ Data Type   │
╞══════════╪═══════════════════════╪══════════════════╪═════════════╡
│ title    │                 20826 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ text     │                 21192 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ subject  │                     2 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ date     │                   716 │                0 │ object      │
╘══════════╧═══════════════════════╧══════════════════╧═════════════╛


In [48]:
datacheck(dff)

Rows: 23481	Columns: 4
╒══════════╤═══════════════════════╤══════════════════╤═════════════╕
│ Column   │   Total Unique Values │   Missing Values │ Data Type   │
╞══════════╪═══════════════════════╪══════════════════╪═════════════╡
│ title    │                 17903 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ text     │                 17455 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ subject  │                     6 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ date     │                  1681 │                0 │ object      │
╘══════════╧═══════════════════════╧══════════════════╧═════════════╛


Creating a new column called class

In [49]:
#First a dictionary is created with column 'class' as we had an type-error
dft_modified = 1 #True
dff_modified = 0  #False

dft['class'] = dft_modified
dff['class'] = dff_modified

Merging both of them

In [50]:
df_combine=pd.concat([dff,dft])

In [51]:
datacheck(df_combine)

Rows: 44898	Columns: 5
╒══════════╤═══════════════════════╤══════════════════╤═════════════╕
│ Column   │   Total Unique Values │   Missing Values │ Data Type   │
╞══════════╪═══════════════════════╪══════════════════╪═════════════╡
│ title    │                 38729 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ text     │                 38646 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ subject  │                     8 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ date     │                  2397 │                0 │ object      │
├──────────┼───────────────────────┼──────────────────┼─────────────┤
│ class    │                     2 │                0 │ int64       │
╘══════════╧═══════════════════════╧══════════════════╧═════════════╛


Keeping only the text and class columns

In [52]:
df_combine=df_combine[['text','class']]

Randomizing the rows

In [53]:
df_combine = df_combine.sample(frac=1)

In [54]:
df_combine.columns

Index(['text', 'class'], dtype='object')

In [55]:
#.apply() -> applies a function to each element of DataFrame
df_combine['text']=df_combine['text'].apply(text_pre)

80:20 Training : Test

In [56]:
X = df_combine['text']
y = df_combine['class']

x_train, x_test, y_train, y_test = train_test_split(X,y, shuffle=True, stratify=y, test_size=0.2, random_state=99)

TfidfVectorizer

In [57]:
#convert text into numerical vectors based on word importance
vectorizer = TfidfVectorizer()
x_train_std = vectorizer.fit_transform(x_train)
x_test_std = vectorizer.transform(x_test)

#MODELS

###LOGISTIC REGRESSION

In [58]:
logi = LogisticRegression()
logi.fit(x_train_std, y_train)

In [59]:
mod(logi,x_train_std, x_test_std, y_train, y_test)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4696
           1       0.98      0.99      0.98      4284

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980





###Decision Tree

In [60]:
tree = DecisionTreeClassifier(random_state=99,max_depth = 5)
tree.fit(x_train_std, y_train)

In [61]:
mod(tree,x_train_std, x_test_std, y_train, y_test)

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      4696
           1       0.99      1.00      1.00      4284

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980





###Random Forest

In [62]:
ranfor = RandomForestClassifier(ccp_alpha = 0.001, max_depth = 9, n_estimators = 500, min_samples_split = 10, random_state = 999)
ranfor.fit(x_train_std, y_train)

In [63]:
mod(ranfor,x_train_std, x_test_std, y_train, y_test)

              precision    recall  f1-score   support

           0       0.96      0.97      0.97      4696
           1       0.97      0.96      0.96      4284

    accuracy                           0.96      8980
   macro avg       0.97      0.96      0.96      8980
weighted avg       0.96      0.96      0.96      8980





###Neural Network

In [64]:
nnmodel = MLPClassifier(hidden_layer_sizes=(5,), activation='identity', alpha =0.0001)
nnmodel.fit(x_train_std, y_train)

In [65]:
mod(nnmodel,x_train_std, x_test_std, y_train, y_test)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4696
           1       0.99      0.99      0.99      4284

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980





#Fake News Test

In [77]:
news = str(input())
testing(news)

BRUSSELS (Reuters) - NATO allies on Tuesday welcomed President Donald Trump s decision to commit more forces to Afghanistan, as part of a new U.S. strategy he said would require more troops and funding from America s partners. Having run for the White House last year on a pledge to withdraw swiftly from Afghanistan, Trump reversed course on Monday and promised a stepped-up military campaign against  Taliban insurgents, saying:  Our troops will fight to win .  U.S. officials said he had signed off on plans to send about 4,000 more U.S. troops to add to the roughly 8,400 now deployed in Afghanistan. But his speech did not define benchmarks for successfully ending the war that began with the U.S.-led invasion of Afghanistan in 2001, and which he acknowledged had required an   extraordinary sacrifice of blood and treasure .  We will ask our NATO allies and global partners to support our new strategy, with additional troops and funding increases in line with our own. We are confident they w

In [78]:
news = str(input())
testing(news)

Vic Bishop Waking TimesOur reality is carefully constructed by powerful corporate, political and special interest sources in order to covertly sway public opinion. Blatant lies are often televised regarding terrorism, food, war, health, etc. They are fashioned to sway public opinion and condition viewers to accept what have become destructive societal norms.The practice of manipulating and controlling public opinion with distorted media messages has become so common that there is a whole industry formed around this. The entire role of this brainwashing industry is to figure out how to spin information to journalists, similar to the lobbying of government. It is never really clear just how much truth the journalists receive because the news industry has become complacent. The messages that it presents are shaped by corporate powers who often spend millions on advertising with the six conglomerates that own 90% of the media:General Electric (GE), News-Corp, Disney, Viacom, Time Warner, a

In [79]:
news = str(input())
testing(news)

SAO PAULO (Reuters) - Cesar Mata Pires, the owner and co-founder of Brazilian engineering conglomerate OAS SA, one of the largest companies involved in Brazil s corruption scandal, died on Tuesday. He was 68. Mata Pires died of a heart attack while taking a morning walk in an upscale district of S o Paulo, where OAS is based, a person with direct knowledge of the matter said. Efforts to contact his family were unsuccessful. OAS declined to comment. The son of a wealthy cattle rancher in the northeastern state of Bahia, Mata Pires  links to politicians were central to the expansion of OAS, which became Brazil s No. 4 builder earlier this decade, people familiar with his career told Reuters last year. His big break came when he befriended Antonio Carlos Magalh es, a popular politician who was Bahia governor several times, and eventually married his daughter Tereza. Brazilians joked that OAS stood for  Obras Arranjadas pelo Sogro  - or  Work Arranged by the Father-In-Law.   After years of