# Bidirectional LSTM Model Training 

## Import necessory libraries

In [24]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
import pandas as pd
import numpy as np 
from nltk.corpus import stopwords
import re
from nltk.stem import PorterStemmer


## Load the dataset
(This is a sample dataset I made)

In [7]:
df = pd.read_csv("fake_news_sample_50.csv")

In [8]:
df.head()

Unnamed: 0,id,title,author,text,label
0,1,Stock Market Rises Amid Economic Recovery,Daniel White,The initiative aims to improve citizens' well-...,0
1,2,Apple Plans to Implant Chips in Humans by 2030,Chris Evans,Reports say the next generation of humans will...,1
2,3,Scientists Develop AI That Detects Diseases Early,Michael Brown,The biodegradable alternative is expected to r...,0
3,4,Aliens Found Living on the Moon,Emily Davis,Strange energy readings have been detected com...,1
4,5,Man Time Travels to 2500 and Returns With Proof,Emily Davis,Rumors suggest Musk intends to conduct secret ...,1


In [9]:
df.isna().sum()

id        0
title     0
author    0
text      0
label     0
dtype: int64

In [11]:
df['label'].value_counts()

label
0    27
1    23
Name: count, dtype: int64

In [17]:
messages = df.copy()

## Cleaning texts

In [29]:
corpus = []
stemmer = PorterStemmer()
for i in range(0, len(messages)):
    review = re.sub("[^a-zA-Z]", " ", messages['title'][i])
    review = review.lower()
    review = review.split()   
    review = [stemmer.stem(word) for word in review if word not in stopwords.words("English")]
    review = " ".join(review)
    corpus.append(review)
corpus

['stock market rise amid econom recoveri',
 'appl plan implant chip human',
 'scientist develop ai detect diseas earli',
 'alien found live moon',
 'man time travel return proof',
 'scientist develop ai detect diseas earli',
 'local startup win intern innov award',
 'tech giant join hand fight climat chang',
 'elon musk buy moon privat experi',
 'tech giant join hand fight climat chang',
 'alien found live moon',
 'ancient pyramid emit mysteri energi beam',
 'stock market rise amid econom recoveri',
 'stock market rise amid econom recoveri',
 'nasa discov new earth like planet',
 'ancient pyramid emit mysteri energi beam',
 'govern launch free healthcar plan',
 'alien found live moon',
 'nasa discov new earth like planet',
 'alien found live moon',
 'alien found live moon',
 'scientist confirm earth flat',
 'appl plan implant chip human',
 'stock market rise amid econom recoveri',
 'cure cancer hidden big pharma',
 'stock market rise amid econom recoveri',
 'local startup win intern in

## Applying one hot encoding 

In [35]:
voc_size = 500
one_hot_repr = [one_hot(sent, voc_size) for sent in corpus]

## Applying padding (to have a fixed input layer size)

In [41]:
sent_length = 10
embedded_docs = pad_sequences(one_hot_repr,sent_length)

## Making the Model 

In [44]:
dimensions = 10
model = Sequential()

model.add(Embedding(input_dim=voc_size, 
                    output_dim=dimensions, 
                    input_length=sent_length))
model.add(Bidirectional(LSTM(100)))
model.add(Dense(1,activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.build(input_shape=(None, sent_length))

model.summary()




In [46]:
x_final = np.array(embedded_docs)
y_final = np.array(df['label'])

## Train Test split

In [49]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_final, y_final, test_size = 0.3, random_state=42)

## Model training

In [53]:
model.fit(X_train,y_train,batch_size=32, epochs=20)

Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 1.0000 - loss: 1.5471e-04
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 1.0000 - loss: 1.5262e-04
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 1.0000 - loss: 1.5342e-04
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 1.0000 - loss: 1.5173e-04
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 1.0000 - loss: 1.5121e-04
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 1.4996e-04
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 1.0000 - loss: 1.4896e-04
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 1.0000 - loss: 1.4824e-04
Epoch 9/20
[1m2/2[0m [32m━━━━

<keras.src.callbacks.history.History at 0x19cd396d370>

## Doing Predictions

In [54]:
y_pred = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 820ms/step


In [55]:
y_pred

array([[1.0678058e-04],
       [9.9979192e-01],
       [9.9973261e-01],
       [1.0468773e-04],
       [9.9984205e-01],
       [1.2545215e-02],
       [1.3032627e-04],
       [1.0678058e-04],
       [1.3032627e-04],
       [9.9984205e-01],
       [1.0678058e-04],
       [9.9979192e-01],
       [9.9982053e-01],
       [4.7587632e-04],
       [9.9984205e-01]], dtype=float32)

In [57]:
predictions = np.where(y_pred>=0.5,1,0)

## Accuracy Checking

In [62]:
from sklearn.metrics import classification_report,confusion_matrix

In [61]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93         7
           1       1.00      0.88      0.93         8

    accuracy                           0.93        15
   macro avg       0.94      0.94      0.93        15
weighted avg       0.94      0.93      0.93        15



In [63]:
print(confusion_matrix(y_test, predictions))

[[7 0]
 [1 7]]
