## **LSTM**

In [1]:
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle

# Load data
data = pd.read_csv('/content/drive/MyDrive/data/elongated_word_10k.csv')
X = data['news']
y = data['is_fake']

# Preprocess the text data with Tokenizer
tokenizer = Tokenizer(num_words=256)
tokenizer.fit_on_texts(X)
X_tokenized = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_tokenized, maxlen=256)

# train test
X_train, X_test, y_train, y_test = train_test_split(
    X_padded, y, test_size=0.2, random_state=21, stratify=y
)

# model
lstm_model = Sequential()
lstm_model.add(layer = Embedding(input_dim = 256, output_dim = 256))
lstm_model.add(layer = LSTM(units = 256, dropout = 0.15, recurrent_dropout = 0.15))
lstm_model.add(layer = Dropout(rate = 0.4))
lstm_model.add(layer = Dense(1,  activation = 'sigmoid'))
lstm_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# predict
lstm_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# evaluation
loss, accuracy = lstm_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# confusion matrix
y_pred = (lstm_model.predict(X_test) > 0.5).astype("int32")
cfm = confusion_matrix(y_test, y_pred)
print(cfm)

# report
print(classification_report(y_test, y_pred))

# save model
model_pkl_file = "/content/drive/MyDrive/data/lstm_model.pkl"

with open(model_pkl_file, 'wb') as file:
    pickle.dump(lstm_model, file)

Epoch 1/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 338ms/step - accuracy: 0.8342 - loss: 0.3991 - val_accuracy: 0.9200 - val_loss: 0.2048
Epoch 2/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 331ms/step - accuracy: 0.9222 - loss: 0.2190 - val_accuracy: 0.9200 - val_loss: 0.2268
Epoch 3/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 332ms/step - accuracy: 0.9313 - loss: 0.2109 - val_accuracy: 0.9210 - val_loss: 0.2116
Epoch 4/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 329ms/step - accuracy: 0.9267 - loss: 0.2086 - val_accuracy: 0.9220 - val_loss: 0.2075
Epoch 5/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 328ms/step - accuracy: 0.9353 - loss: 0.1932 - val_accuracy: 0.9280 - val_loss: 0.2112
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 103ms/step - accuracy: 0.9218 - loss: 0.2201
Test Accuracy: 92.80%
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [3]:
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.metrics import classification_report, confusion_matrix



with open('/content/drive/MyDrive/data/lstm_model.pkl', 'rb') as f:
    lstm_model = pickle.load(f)

data = pd.read_csv('/content/drive/MyDrive/data/val_10k.csv')
X = data['news']
y = data['is_fake']


tokenizer = Tokenizer(num_words=256)
tokenizer.fit_on_texts(X)
X_tokenized = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_tokenized, maxlen=256)

loss, accuracy = lstm_model.evaluate(X_padded, y)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

y_pred = (lstm_model.predict(X_padded) > 0.5).astype("int32")
cfm = confusion_matrix(y, y_pred)
print(cfm)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step - accuracy: 0.7206 - loss: 0.9877
Test Accuracy: 71.94%
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step
[[4373  356]
 [2450 2821]]
