In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout

# Load the dataset
data = pd.read_csv('HateSpeechDetection.csv')

# Preprocess the data
texts = data['Text'].values
labels = data['Label'].values

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Tokenize the text
tokenizer = Tokenizer(num_words=15000)
tokenizer.fit_on_texts(texts)
X = tokenizer.texts_to_sequences(texts)
X = pad_sequences(X, maxlen=200)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [24]:
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dense, Embedding, GlobalMaxPooling1D, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping

# Build the model
model = Sequential()
model.add(Input(shape=(200,)))
model.add(Embedding(input_dim=15000, output_dim=200, input_length=200))
model.add(SimpleRNN(256, return_sequences=True))
model.add(GlobalMaxPooling1D())
model.add(Dense(512,activation='sigmoid'))
model.add(Dropout(0.4))
model.add(Dense(128,activation='relu'))
model.add(Dense(1, activation='sigmoid'))


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
model.fit(X_train, y_train, epochs=10, batch_size=64, callbacks=[early_stopping])


Epoch 1/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 190ms/step - accuracy: 0.5699 - loss: 0.7068
Epoch 2/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 189ms/step - accuracy: 0.6002 - loss: 0.6750
Epoch 3/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 196ms/step - accuracy: 0.5962 - loss: 0.6767
Epoch 4/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 216ms/step - accuracy: 0.5925 - loss: 0.6790
Epoch 5/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 189ms/step - accuracy: 0.5925 - loss: 0.6781
Epoch 6/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 186ms/step - accuracy: 0.6072 - loss: 0.6614
Epoch 7/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 185ms/step - accuracy: 0.7555 - loss: 0.4919
Epoch 8/10
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 196ms/step - accuracy: 0.8089 - loss: 0.4034
Epoch 9/10
[1m2

<keras.src.callbacks.history.History at 0x24e41b2a610>

In [26]:
from sklearn.metrics import classification_report
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")
y_pred = y_pred.flatten()
print(classification_report(y_test,y_pred))

[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 34ms/step
              precision    recall  f1-score   support

           0       0.86      0.74      0.74      2094
           1       0.70      0.68      0.72      1426

    accuracy                           0.78      3520
   macro avg       0.82      0.70      0.80      3520
weighted avg       0.76      0.72      0.68      3520


In [30]:
from sklearn.metrics import roc_auc_score

roc_auc=roc_auc_score(y_test,y_pred)
print('ROC-AUC SCORE:',roc_auc)

ROC-AUC SCORE: 0.7376412403835979
