# Importing the libaries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, Conv1D, GlobalMaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt

# Loading the dataset and tokenization

In [2]:
# Load the dataset
df = pd.read_csv('final.csv')
# Ensure all entries in 'tweet' column are strings and handle NaNs
df['tweet'] = df['tweet'].astype(str).fillna('')

# Tokenize and pad the sequences
max_features = 2000
max_length = 100

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(df['tweet'])
X = tokenizer.texts_to_sequences(df['tweet'])
X = pad_sequences(X, maxlen=max_length)

# Encode the labels
encoder = LabelEncoder()
y = encoder.fit_transform(df['class'])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# LSTM(Long Short-Term Memory) model

In [3]:

# Define the model
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=hp.Int('embedding_output_dim', min_value=32, max_value=128, step=32), input_length=max_length))
    model.add(LSTM(hp.Int('lstm_units', min_value=32, max_value=128, step=32)))
    model.add(Dropout(hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(3, activation='softmax'))  # Assuming 3 classes: Normal, Offensive, Hate

    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model

# Hyperparameter tuning
tuner = kt.Hyperband(build_model,
                     objective='val_sparse_categorical_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='text_classification')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = build_model(best_hps)

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=-1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'F1 Score: {f1}')


Reloading Tuner from my_dir\text_classification\tuner0.json




Epoch 1/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 94ms/step - loss: 0.5651 - sparse_categorical_accuracy: 0.8017 - val_loss: 0.3000 - val_sparse_categorical_accuracy: 0.9009
Epoch 2/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 94ms/step - loss: 0.2866 - sparse_categorical_accuracy: 0.9014 - val_loss: 0.2845 - val_sparse_categorical_accuracy: 0.8938
Epoch 3/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 93ms/step - loss: 0.2248 - sparse_categorical_accuracy: 0.9242 - val_loss: 0.2854 - val_sparse_categorical_accuracy: 0.9024
Epoch 4/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 93ms/step - loss: 0.2057 - sparse_categorical_accuracy: 0.9271 - val_loss: 0.3055 - val_sparse_categorical_accuracy: 0.9014
Epoch 5/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 92ms/step - loss: 0.1787 - sparse_categorical_accuracy: 0.9361 - val_loss: 0.3218 - val_sparse_categorical_accurac

# Bidirectional

In [4]:

# Define the model
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=hp.Int('embedding_output_dim', min_value=32, max_value=128, step=32), input_length=max_length))
    model.add(Bidirectional(LSTM(hp.Int('lstm_units', min_value=32, max_value=128, step=32))))
    model.add(Dropout(hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(3, activation='softmax'))  # Assuming 3 classes: Normal, Offensive, Hate

    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model

# Hyperparameter tuning
tuner = kt.Hyperband(build_model,
                     objective='val_sparse_categorical_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='text_classification')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = build_model(best_hps)

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=-1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'F1 Score: {f1}')


Reloading Tuner from my_dir\text_classification\tuner0.json




Epoch 1/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 108ms/step - loss: 0.5674 - sparse_categorical_accuracy: 0.7978 - val_loss: 0.3005 - val_sparse_categorical_accuracy: 0.9024
Epoch 2/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 70ms/step - loss: 0.2928 - sparse_categorical_accuracy: 0.9005 - val_loss: 0.2859 - val_sparse_categorical_accuracy: 0.9017
Epoch 3/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 50ms/step - loss: 0.2333 - sparse_categorical_accuracy: 0.9163 - val_loss: 0.2829 - val_sparse_categorical_accuracy: 0.9017
Epoch 4/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 48ms/step - loss: 0.2017 - sparse_categorical_accuracy: 0.9298 - val_loss: 0.2939 - val_sparse_categorical_accuracy: 0.8959
Epoch 5/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 50ms/step - loss: 0.1811 - sparse_categorical_accuracy: 0.9348 - val_loss: 0.3361 - val_sparse_categorical_accura

# Convolutional Neural Network (CNN) model


In [5]:

# Load the dataset
df = pd.read_csv('final.csv')



# Define the model
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=hp.Int('embedding_output_dim', min_value=32, max_value=128, step=32), input_length=max_length))
    model.add(Conv1D(filters=hp.Int('filters', min_value=32, max_value=128, step=32), kernel_size=hp.Choice('kernel_size', values=[3, 5, 7]), activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dropout(hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(3, activation='softmax'))  # Assuming 3 classes: Normal, Offensive, Hate

    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model

# Hyperparameter tuning
tuner = kt.Hyperband(build_model,
                     objective='val_sparse_categorical_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='text_classification')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = build_model(best_hps)

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=-1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'F1 Score: {f1}')


Reloading Tuner from my_dir\text_classification\tuner0.json
Epoch 1/10




[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.5933 - sparse_categorical_accuracy: 0.7996 - val_loss: 0.2888 - val_sparse_categorical_accuracy: 0.9097
Epoch 2/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.2890 - sparse_categorical_accuracy: 0.9025 - val_loss: 0.2637 - val_sparse_categorical_accuracy: 0.9067
Epoch 3/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.2460 - sparse_categorical_accuracy: 0.9160 - val_loss: 0.2753 - val_sparse_categorical_accuracy: 0.9027
Epoch 4/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.2124 - sparse_categorical_accuracy: 0.9274 - val_loss: 0.2923 - val_sparse_categorical_accuracy: 0.9009
Epoch 5/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.1847 - sparse_categorical_accuracy: 0.9362 - val_loss: 0.3089 - val_sparse_categorical_accuracy: 0.8974
Epoch 6/10
