#  __Deep Learning Models__



## Import necessary libraries

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, SpatialDropout1D, Conv1D, GlobalMaxPooling1D, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [None]:
#Load the cleaned dataset
df = pd.read_csv('../Data-Preprocessing/cleaned_data.csv')
df['tweet'] = df['tweet'].astype(str)

## Tokenize the data for Deep Learning models

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['tweet'])
X_seq = tokenizer.texts_to_sequences(df['tweet'])
X_pad = pad_sequences(X_seq, maxlen=100)  # Padding sequences to the same length
vocab_size = len(tokenizer.word_index) + 1
y = df['class']  # Assuming 'class' is the target variable

## Prepare Data for Modeling

In [None]:
# Split the padded sequences into training and testing sets
X_train_pad, X_test_pad, y_train_pad, y_test_pad = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Address class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_pad_res, y_train_pad_res = smote.fit_resample(X_train_pad, y_train_pad)

## 1.  __LSTM__ model

In [None]:
# Define LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
lstm_model.add(SpatialDropout1D(0.2))
lstm_model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
lstm_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate LSTM model
lstm_loss, lstm_accuracy = lstm_model.evaluate(X_test_pad, y_test_pad)
lstm_y_pred = lstm_model.predict(X_test_pad)
lstm_y_pred_classes = np.argmax(lstm_y_pred, axis=1)
lstm_report = classification_report(y_test_pad, lstm_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("LSTM Model Accuracy:", lstm_accuracy)
print("LSTM Model Classification Report:\n", lstm_report)

## 2.  __CNN__ 

In [None]:
#define cnn model
cnn_model = Sequential()
cnn_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
cnn_model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
cnn_model.add(GlobalMaxPooling1D())
cnn_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

cnn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate CNN model
cnn_loss, cnn_accuracy = cnn_model.evaluate(X_test_pad, y_test_pad)
cnn_y_pred = cnn_model.predict(X_test_pad)
cnn_y_pred_classes = np.argmax(cnn_y_pred, axis=1)
cnn_report = classification_report(y_test_pad, cnn_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("CNN Model Accuracy:", cnn_accuracy)
print("CNN Model Classification Report:\n", cnn_report)

## 2.  __Bidirectional LSTM__ model

In [None]:
#Define Bidirectional LSTM model
bi_lstm_model = Sequential()
bi_lstm_model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=100))
bi_lstm_model.add(SpatialDropout1D(0.2))
bi_lstm_model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
bi_lstm_model.add(Dense(3, activation='softmax'))  # Assuming 3 classes

bi_lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
bi_lstm_model.fit(X_train_pad_res, y_train_pad_res, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test_pad))

# Evaluate Bidirectional LSTM model
bi_lstm_loss, bi_lstm_accuracy = bi_lstm_model.evaluate(X_test_pad, y_test_pad)
bi_lstm_y_pred = bi_lstm_model.predict(X_test_pad)
bi_lstm_y_pred_classes = np.argmax(bi_lstm_y_pred, axis=1)
bi_lstm_report = classification_report(y_test_pad, bi_lstm_y_pred_classes, target_names=['Normal', 'Hate', 'Offensive'])

print("Bidirectional LSTM Model Accuracy:", bi_lstm_accuracy)
print("Bidirectional LSTM Model Classification Report:\n", bi_lstm_report)