In [56]:
from google.colab import drive

In [57]:
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

In [59]:
data1 = pd.read_csv("/content/drive/MyDrive/WELFake_Dataset.csv")

In [60]:
data1.shape

(72134, 4)

In [61]:
data1.head(10)
data1 = data1.drop(columns = ["Unnamed: 0", "title"])

data1

Unnamed: 0,text,label
0,No comment is expected from Barack Obama Membe...,1
1,Did they post their votes for Hillary already?,1
2,"Now, most of the demonstrators gathered last ...",1
3,A dozen politically active pastors came here f...,0
4,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1
...,...,...
72129,WASHINGTON (Reuters) - Hackers believed to be ...,0
72130,"You know, because in fantasyland Republicans n...",1
72131,Migrants Refuse To Leave Train At Refugee Camp...,0
72132,MEXICO CITY (Reuters) - Donald Trump’s combati...,0


In [62]:
data1.rename(columns = {'label':'class'}, inplace = True)
data = data1

In [63]:
data = data.dropna()

In [64]:
data.isnull().sum()

text     0
class    0
dtype: int64

In [65]:
data = data.sample(frac = 1)

In [66]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [67]:
data.head()

Unnamed: 0,text,class
0,NC REPUBLICAN HEADQUARTERS Firebombed By The T...,1
1,When a Russian oncologist called her daught...,0
2,LONDON (Reuters) - Britain s government said o...,0
3,Russia may have executed the ruthless and elus...,0
4,The use of extreme rhetoric by presidential ca...,0


In [68]:
def wordopt(text):
  text = text.lower()
  text = re.sub('\[.*?\]', '', text)
  text = re.sub("\\W", " ", text)
  text = re.sub ('https?://\S+|www\.\S+','', text)
  text = re.sub ('<.*?>+','', text)
  text = re.sub ('[%s]' % re.escape(string.punctuation), '', text)
  text = re.sub ('\n', '', text)
  text = re.sub ('\w*\d\w*', '', text)
  return text

In [69]:
data['text'] = data['text'].apply(wordopt)

In [70]:
data.count()

text     72095
class    72095
dtype: int64

In [71]:
data = data.drop_duplicates()

In [72]:
data.count()

text     62382
class    62382
dtype: int64

In [73]:
X = data['text']
y = data['class']

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25)

In [75]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
Xv_train = vectorization.fit_transform(X_train)
Xv_test = vectorization.transform(X_test)


In [76]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(Xv_train, y_train)

In [77]:
pred_lr = LR.predict(Xv_test)

In [78]:
LR.score(Xv_test, y_test)

0.9390228263657348

In [79]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.95      0.94      0.94      8514
           1       0.93      0.93      0.93      7082

    accuracy                           0.94     15596
   macro avg       0.94      0.94      0.94     15596
weighted avg       0.94      0.94      0.94     15596



In [80]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(Xv_train, y_train)

In [81]:
pred_dt = DT.predict(Xv_test)

In [82]:
DT.score(Xv_test, y_test)

0.898756091305463

In [83]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91      8514
           1       0.89      0.89      0.89      7082

    accuracy                           0.90     15596
   macro avg       0.90      0.90      0.90     15596
weighted avg       0.90      0.90      0.90     15596



In [84]:
from sklearn.ensemble import GradientBoostingClassifier

GB = GradientBoostingClassifier(random_state = 0)
GB.fit(Xv_train, y_train)

In [85]:
pred_gb = GB.predict(Xv_test)

In [86]:
GB.score(Xv_test, y_test)

0.9294690946396512

In [87]:
print(classification_report(y_test, pred_gb))

              precision    recall  f1-score   support

           0       0.95      0.92      0.93      8514
           1       0.91      0.94      0.92      7082

    accuracy                           0.93     15596
   macro avg       0.93      0.93      0.93     15596
weighted avg       0.93      0.93      0.93     15596



In [None]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(random_state = 0)
RF.fit(Xv_train, y_train)

In [None]:
pred_rf = RF.predict(Xv_test)

In [None]:
RF.score(Xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_rf))

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, GRU, LSTM, RNN, SpatialDropout1D

In [None]:
# LSTM Neural Network
lstm_model = Sequential(name = 'lstm_nn_model')
lstm_model.add(layer = Embedding(input_dim = max_features, output_dim = 120, name = '1st_layer'))
lstm_model.add(layer = LSTM(units = 120, dropout = 0.2, recurrent_dropout = 0.2, name = '2nd_layer'))
lstm_model.add(layer = Dropout(rate = 0.5, name = '3rd_layer'))
lstm_model.add(layer = Dense(units = 120,  activation = 'relu', name = '4th_layer'))
lstm_model.add(layer = Dropout(rate = 0.5, name = '5th_layer'))
lstm_model.add(layer = Dense(units = len(set(y)),  activation = 'sigmoid', name = 'output_layer'))
# compiling the model
lstm_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
lstm_model_fit = lstm_model.fit(Xv_train, y_train, epochs = 10)

In [None]:
# GRU neural Network
gru_model = Sequential(name = 'gru_nn_model')
gru_model.add(layer = Embedding(input_dim = max_features, output_dim = 120, name = '1st_layer'))
gru_model.add(layer = GRU(units = 120, dropout = 0.2,
                          recurrent_dropout = 0.2, recurrent_activation = 'relu',
                          activation = 'relu', name = '2nd_layer'))
gru_model.add(layer = Dropout(rate = 0.4, name = '3rd_layer'))
gru_model.add(layer = Dense(units = 120, activation = 'relu', name = '4th_layer'))
gru_model.add(layer = Dropout(rate = 0.2, name = '5th_layer'))
gru_model.add(layer = Dense(units = len(set(y)), activation = 'softmax', name = 'output_layer'))
# compiling the model
gru_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
gru_model_fit = gru_model.fit(X_train, y_train, epochs = 10)

In [None]:
def output_table(n):
  if n == 1:
    return "Fake News"
  elif n == 0:
    return "Not A Fake News"

def manual_testing(news):
  testing_news = {"text": [news]}
  new_def_test = pd.DataFrame(testing_news)
  new_def_test["text"] = new_def_test["text"].apply(wordopt)
  new_x_test = new_def_test["text"]
  new_xv_test = vectorization.transform(new_x_test)
  pred_LR = LR.predict(new_xv_test)
  pred_DT = DT.predict(new_xv_test)
  pred_GB = GB.predict(new_xv_test)
  pred_RF = RF.predict(new_xv_test)
  pred_lstm = lstm_model.predict_classes(new_xv_test)

  return print(f"""
  Logistic Regression Prediction   : {output_table(pred_LR[0])}
  Decision Tree Prediction         : {output_table(pred_DT[0])}
  Gradient Boosting Prediction     : {output_table(pred_GB[0])}
  Random Forest Prediction         : {output_table(pred_RF[0])}
  Long Short-term Memory Prediction: {output_table(pred_lstm[0])}
  """)

In [None]:
news = str(input("Insert News Text here :"))
manual_testing(news)