#  __Deep Learning Models__



## Import necessary libraries

In [15]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, SpatialDropout1D, Conv1D, GlobalMaxPooling1D, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [16]:
#Load the cleaned dataset
df = pd.read_csv('../Data-Preprocessing/cleaned_data.csv')
df['tweet'] = df['tweet'].astype(str)

## Tokenize the data for Deep Learning models

In [17]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['tweet'])
X_seq = tokenizer.texts_to_sequences(df['tweet'])
X_pad = pad_sequences(X_seq, maxlen=100)  # Padding sequences to the same length
vocab_size = len(tokenizer.word_index) + 1
y = df['class']  # Assuming 'class' is the target variable

## Prepare Data for Modeling

In [18]:
# Split the padded sequences into training and testing sets
X_train_pad, X_test_pad, y_train_pad, y_test_pad = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Address class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_pad_res, y_train_pad_res = smote.fit_resample(X_train_pad, y_train_pad)

## 1.  __LSTM__ model

In [19]:
# Define LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
lstm_model.add(SpatialDropout1D(0.2))
lstm_model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
lstm_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate LSTM model
lstm_loss, lstm_accuracy = lstm_model.evaluate(X_test_pad, y_test_pad)
lstm_y_pred = lstm_model.predict(X_test_pad)
lstm_y_pred_classes = np.argmax(lstm_y_pred, axis=1)
lstm_report = classification_report(y_test_pad, lstm_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("LSTM Model Accuracy:", lstm_accuracy)
print("LSTM Model Classification Report:\n", lstm_report)

Epoch 1/5




[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 127ms/step - accuracy: 0.6018 - loss: 0.8481 - val_accuracy: 0.8080 - val_loss: 0.5358
Epoch 2/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 128ms/step - accuracy: 0.7295 - loss: 0.6268 - val_accuracy: 0.7967 - val_loss: 0.5685
Epoch 3/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 130ms/step - accuracy: 0.7861 - loss: 0.5184 - val_accuracy: 0.7612 - val_loss: 0.6631
Epoch 4/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 129ms/step - accuracy: 0.8223 - loss: 0.4328 - val_accuracy: 0.7915 - val_loss: 0.6406
Epoch 5/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 130ms/step - accuracy: 0.8527 - loss: 0.3682 - val_accuracy: 0.7848 - val_loss: 0.7018
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7808 - loss: 0.7332
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step
LSTM

## 2.  __CNN__ 

In [20]:
#define cnn model
cnn_model = Sequential()
cnn_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
cnn_model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
cnn_model.add(GlobalMaxPooling1D())
cnn_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

cnn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate CNN model
cnn_loss, cnn_accuracy = cnn_model.evaluate(X_test_pad, y_test_pad)
cnn_y_pred = cnn_model.predict(X_test_pad)
cnn_y_pred_classes = np.argmax(cnn_y_pred, axis=1)
cnn_report = classification_report(y_test_pad, cnn_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("CNN Model Accuracy:", cnn_accuracy)
print("CNN Model Classification Report:\n", cnn_report)

Epoch 1/5




[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 40ms/step - accuracy: 0.6153 - loss: 0.8245 - val_accuracy: 0.7933 - val_loss: 0.6373
Epoch 2/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 38ms/step - accuracy: 0.7835 - loss: 0.5260 - val_accuracy: 0.8365 - val_loss: 0.4741
Epoch 3/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 39ms/step - accuracy: 0.8956 - loss: 0.3002 - val_accuracy: 0.8094 - val_loss: 0.5693
Epoch 4/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 41ms/step - accuracy: 0.9539 - loss: 0.1479 - val_accuracy: 0.7602 - val_loss: 0.8164
Epoch 5/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 42ms/step - accuracy: 0.9765 - loss: 0.0826 - val_accuracy: 0.7660 - val_loss: 0.9246
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7680 - loss: 0.9435
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
CNN Model A

## 3.  __Bidirectional LSTM__ model

In [21]:
#Define Bidirectional LSTM model
bi_lstm_model = Sequential()
bi_lstm_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
bi_lstm_model.add(SpatialDropout1D(0.2))
bi_lstm_model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
bi_lstm_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

bi_lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
bi_lstm_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate Bidirectional LSTM model
bi_lstm_loss, bi_lstm_accuracy = bi_lstm_model.evaluate(X_test_pad, y_test_pad)
bi_lstm_y_pred = bi_lstm_model.predict(X_test_pad)
bi_lstm_y_pred_classes = np.argmax(bi_lstm_y_pred, axis=1)
bi_lstm_report = classification_report(y_test_pad, bi_lstm_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("Bidirectional LSTM Model Accuracy:", bi_lstm_accuracy)
print("Bidirectional LSTM Model Classification Report:\n", bi_lstm_report)

Epoch 1/5




[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 129ms/step - accuracy: 0.5908 - loss: 0.8569 - val_accuracy: 0.8153 - val_loss: 0.5187
Epoch 2/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 127ms/step - accuracy: 0.7268 - loss: 0.6249 - val_accuracy: 0.8099 - val_loss: 0.5290
Epoch 3/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 132ms/step - accuracy: 0.7787 - loss: 0.5268 - val_accuracy: 0.7917 - val_loss: 0.6089
Epoch 4/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 128ms/step - accuracy: 0.8214 - loss: 0.4371 - val_accuracy: 0.7899 - val_loss: 0.6147
Epoch 5/5
[1m722/722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 128ms/step - accuracy: 0.8511 - loss: 0.3710 - val_accuracy: 0.7804 - val_loss: 0.6762
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.7827 - loss: 0.6993
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step
Bidi