#### Importing Required Libraries and Loading Dataset

In [188]:
import pandas as pd # importing required libraries
import numpy as np

In [189]:
df=pd.read_csv("WELFake_Dataset.csv") # loading dataset

#### Data Inspection

In [190]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,1,,Did they post their votes for Hillary already?,1
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [191]:
df.shape

(72134, 4)

In [192]:
df.size

288536

In [193]:
df=df.drop('Unnamed: 0',axis=1) # droping a column

In [194]:
df.columns

Index(['title', 'text', 'label'], dtype='object')

In [195]:
df.isnull().sum().sum() # checking null values

597

In [197]:
df.isnull().sum()  # checking null values

title    558
text      39
label      0
dtype: int64

In [200]:
df=df.dropna() # droping null values

In [201]:
df.shape

(71537, 3)

In [202]:
df.isnull().sum()

title    0
text     0
label    0
dtype: int64

#### Splitting of Data Into X and y

In [203]:
X=df.drop('label',axis=1) # independent feature

In [204]:
y=df['label'] # dependent feature

In [205]:
X.shape

(71537, 2)

In [206]:
df.shape

(71537, 3)

In [207]:
y.shape

(71537,)

#### Import Deep Learning Libraries

In [208]:
import tensorflow

In [209]:
tf.__version__

'2.10.0'

In [210]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [211]:
voc_size=10000 # vocabulary size

#### One Hot Encoding

In [212]:
messages=X.copy()

In [214]:
messages['title'][10]

' GOP Senator Just Smacked Down The Most Punchable Alt-Right Nazi On The Internet'

In [216]:
messages.reset_index(inplace=True)

In [217]:
import nltk
import re
from nltk.corpus import stopwords

In [218]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [219]:
# data preprocessing
from nltk.stem.porter import PorterStemmer 
ps=PorterStemmer()
corpus=[]
for i in range(0,len(messages)):
    review = re.sub('[^a-zA-Z]',' ',messages['title'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [221]:
corpus[70000]

'donald trump hillari clinton win easili new york primari new york time'

In [222]:
onehot_repr=[one_hot(words,voc_size) for words in corpus]
onehot_repr[10]           

[9202, 571, 3839, 1484, 6815, 1532, 277, 145, 277, 3424]

In [223]:
corpus[10]

'may brexit offer would hurt cost eu citizen eu parliament'

#### Embedding Representation

In [224]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)

In [225]:
embedded_docs[1]

array([   0,    0,    0,    0,    0,    0, 6518, 9944, 6477, 2635, 7918,
       5260, 8777, 9497, 7869, 9353, 2507, 2975, 2464, 4209])

In [226]:
embedded_docs[0]

array([   0,    0,    0,    0,    0,    0,    0,    0, 6895, 1780, 3970,
       9584, 4308, 1083, 5276, 6301, 9887, 3198, 4974, 4209])

#### Adding Dropout

In [228]:
# creating model
embedding_vector_features=40 # feature representation
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [229]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 20, 40)            400000    
                                                                 
 lstm_4 (LSTM)               (None, 100)               56400     
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 456,501
Trainable params: 456,501
Non-trainable params: 0
_________________________________________________________________


In [230]:
len(embedded_docs),y.shape

(71537, (71537,))

In [231]:
X_final=np.array(embedded_docs)
y_final=np.array(y) 

In [232]:
X_final.shape,y_final.shape

((71537, 20), (71537,))

In [233]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_final,y_final,test_size=0.33,random_state=42)

#### Model Training

In [241]:
# final training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=5, batch_size=5000)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1dae7f18d30>

In [247]:
y_pred=model.predict(X_test)



In [260]:
y_pred=np.where(y_pred>0.5,1,0)

#### Perfomence Metrics and Accuracy

In [261]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
confusion_matrix(y_test,y_pred)

array([[10224,  1469],
       [ 1164, 10751]], dtype=int64)

In [262]:
print("accuracy prercentage is : {} %".format(((accuracy_score(y_test,y_pred))*100).round(2)))

accuracy prercentage is : 88.85 %


In [263]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.90      0.87      0.89     11693
           1       0.88      0.90      0.89     11915

    accuracy                           0.89     23608
   macro avg       0.89      0.89      0.89     23608
weighted avg       0.89      0.89      0.89     23608

