In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Input, Embedding, LSTM, GlobalMaxPool1D, Dense, Dropout
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import numpy as np

In [2]:
data = pd.read_csv("labeled_data.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,1,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,0,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,0,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,0,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,0,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


In [3]:
data.dtypes

Unnamed: 0             int64
count                  int64
hate_speech            int64
offensive_language     int64
neither                int64
class                  int64
tweet                 object
dtype: object

In [4]:
tokenizer = Tokenizer(num_words=10000) 
tokenizer.fit_on_texts(data['tweet'])

X = tokenizer.texts_to_sequences(data['tweet'])
X = pad_sequences(X, padding='post', maxlen=100) 
y = data['class']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
def create_model(learning_rate=0.001, dropout_rate=0.0, units=128):
    model = Sequential([
        Input(shape=(100,)), 
        Embedding(input_dim=10000, output_dim=128), 
        LSTM(units),  
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',  
                  metrics=["accuracy"])
    return model

model = KerasClassifier(model=create_model, verbose=0, epochs=5, batch_size=32)


In [7]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer

# Define the search space
search_spaces = {
    'model__learning_rate': Real(1e-4, 1e-2, prior='log-uniform'),  
    'model__dropout_rate': Real(0.0, 0.5),                         
    'model__units': Integer(64, 256),                          
    'batch_size': Integer(32, 64)                           
}


model = KerasClassifier(model=create_model, verbose=0, epochs=3)


bayes_search = BayesSearchCV(
    estimator=model,
    search_spaces=search_spaces,
    n_iter=10,         
    n_jobs=-1,         
    cv=3,            
    verbose=1,
    random_state=42   
)

bayes_result = bayes_search.fit(X_train, y_train)

print("Best parameters (Bayesian Search):", bayes_result.best_params_)
print("Best score (Bayesian Search):", bayes_result.best_score_)


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best parameters (Bayesian Search): OrderedDict({'batch_size': 45, 'model__dropout_rate': 0.36386287158866254, 'model__learning_rate': 0.007340675018434775, 'model__units': 125})
Best score (Bayesian Search): 0.8321396163374937
