In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from skmultilearn.adapt import MLkNN
import sklearn.metrics as metrics
from sklearn.metrics import hamming_loss, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import multilabel_confusion_matrix
import os

In [2]:
d2= pd.read_csv('train_en_l1.csv')
d2
d2 = d2.rename(columns={'key' : 'unique_id', 'sentence' : 'text'})
d2.to_csv('updated_train_en_l1.csv', index=False)
# d2

# Convert annotator columns to numeric without replacing NaNs
d2[['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']] = d2[
    ['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']
].apply(pd.to_numeric, errors='coerce')  # NaNs are retained

# Compute 'label' based on majority voting while ignoring NaNs
d2['label'] = (d2[['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']].mean(axis=1, skipna=True) >= 0.5).astype(int)
d2


Unnamed: 0,text,unique_id,en_a1,en_a2,en_a3,en_a4,en_a5,en_a6,label
0,"This is our history . For Shia and Sunni ,...",question_1,,,,,,0.0,0
1,"If this is ""grilling"" then wht wud u call t...",question_1,,,,1.0,,,1
2,look at <handle replaced>madam. Locals attac...,question_1,,,,,0.0,,0
3,our beloved yogi ji has given money home and...,question_1,,,,,1.0,,1
4,"""I gonna kill that bitch"" Bitvj imma kill you...",question_1,,,,,,1.0,1
...,...,...,...,...,...,...,...,...,...
6526,“mama let’s research” bitch suck my dick 😭,question_1,,,,,0.0,,0
6527,😂😂😂😂 I WAS BY MYSELF AND IT WAS HER AND HER BI...,question_1,,,,,0.0,,0
6528,😂😂😂😂😂😂😂😂😂 i just seen a comment and bitch IM C...,question_1,,,,,0.0,,0
6529,"😎""Boys Only Luxury!!!!!!!!!!!!!!!!!!!!!!!!!!!""...",question_1,0.0,1.0,1.0,0.0,,,1


In [3]:
# Create binary label ('hate' or 'not_hate')
def determine_binary_label(label):
    return 'hate' if label == 1 else 'not_hate'

d2['binary_label'] = d2['label'].apply(determine_binary_label)

# # Reorder columns
d2 = d2[['unique_id', 'text', 'binary_label', 'label']]

In [4]:
d2.to_csv('updated_train_en_l1.csv', index=False)

In [5]:
## Word Pre-Processing ##
import nltk
nltk.download('stopwords')
import string
import re
wpt = nltk.WordPunctTokenizer()
stop_words_init = nltk.corpus.stopwords.words('english')
stop_words = [i for i in stop_words_init if i not in ('not','and','for')]
print(stop_words)
## Function to normalize text for pre-processing ##
def normalize_text(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', ' ', text)
    text = re.sub(r'https?://\S+|www\.\S+', ' ', text)
    text = re.sub(r'<.*?>+', ' ', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), ' ', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\w*\d\w*', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text
## Apply the written function ##
d2.loc[:, 'text'] = d2['text'].apply(lambda x: normalize_text(x))
processed_list = []
for j in d2['text']:
    process = j.replace('...','')
    processed_list.append(process)

df_processed = pd.DataFrame(processed_list)
df_processed.columns = ['text']
df_processed.head(n=5)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\krmri\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd", "he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is', 'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she', "she'd", "she'll",

Unnamed: 0,text
0,this is our history for shia and sunni don t l...
1,if this is grilling then wht wud u call th stu...
2,look at madam locals attacked no religion angl...
3,our beloved yogi ji has given money home and f...
4,i gonna kill that bitch bitvj imma kill you we...


In [6]:
X = list(df_processed['text'])
y = d2[['label']].values
y

array([[0],
       [1],
       [0],
       ...,
       [0],
       [1],
       [0]], shape=(6531, 1))

In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    LSTM, Activation, Dropout, Dense, Flatten,
    Bidirectional, GRU, concatenate, SpatialDropout1D,
    GlobalMaxPooling1D, GlobalAveragePooling1D, Conv1D,
    Embedding, Input, Concatenate
)
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.losses import MeanSquaredError

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

######## Textual Features for Embedding ###################
max_len = 100
max_features = 4479

# Tokenization
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

# Padding
X = pad_sequences(X, padding='post', maxlen=max_len)

print(X)  # Check the processed sequences

[[  12    6   68 ...    0    0    0]
 [  34   12    6 ...    0    0    0]
 [ 158   64  239 ...    0    0    0]
 ...
 [  10   41  387 ...    0    0    0]
 [1590 1170   69 ...    0    0    0]
 [ 461  294   27 ...    0    0    0]]


In [8]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y

  y = column_or_1d(y, warn=True)


array([0, 1, 0, ..., 0, 1, 0], shape=(6531,))

In [9]:
from keras.utils import to_categorical
y = to_categorical(y, num_classes=2)
y

array([[1., 0.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [0., 1.],
       [1., 0.]], shape=(6531, 2))

In [10]:
import numpy as np
import json

# Load GloVe embeddings from JSON
with open('glove_embeddings.json', encoding="utf8") as f:
    embeddings_list = json.load(f)

# Convert the list of vectors to a dictionary with word indices as keys
embeddings_dictionary = {str(i): vector for i, vector in enumerate(embeddings_list)}

# Define tokenizer 
vocab_size = len(tokenizer.word_index) + 1  # Vocabulary size
word_index = tokenizer.word_index
num_words = min(max_features, vocab_size)  # Limit vocab to max_features

# Get embedding dimension (from first vector in list)
embed_size = len(embeddings_list[0]) if embeddings_list else 0

# Initialize embedding matrix
embedding_matrix = np.zeros((num_words, embed_size))

# Fill embedding matrix with corresponding word vectors
for word, index in word_index.items():
    if index >= max_features:
        continue
    embedding_vector = embeddings_dictionary.get(word) or embeddings_dictionary.get(str(index))
    if embedding_vector is not None:
        embedding_matrix[index] = np.asarray(embedding_vector, dtype=np.float32)

print("Embedding matrix shape:", embedding_matrix.shape)

Embedding matrix shape: (4479, 50)


In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Embedding, SpatialDropout1D, Conv1D,
    Bidirectional, LSTM, GRU, Dense, Dropout,
    GlobalAveragePooling1D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Configure GPU for optimal performance
def configure_gpu():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            # Enable memory growth for each GPU
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
            # Use mixed precision for better performance
            policy = tf.keras.mixed_precision.Policy('mixed_float16')
            tf.keras.mixed_precision.set_global_policy(policy)
            print('Mixed precision enabled')
        except RuntimeError as e:
            print(e)

configure_gpu()

# Model Definition - GRU with Attention
def create_gru_attention_model(max_len, max_features, embedding_matrix, embed_size=300):
    """
    Creates an enhanced GRU model with hierarchical attention mechanism
    """
    # Input layer
    input_layer = Input(shape=(max_len,))
    
    # Embedding layer with pretrained weights
    embedding_layer = Embedding(
        input_dim=max_features,
        output_dim=embed_size,
        weights=[embedding_matrix],
        input_length=max_len,
        trainable=True  # Make embeddings trainable for fine-tuning
    )(input_layer)
    
    # Spatial Dropout with higher rate
    spatial_dropout = SpatialDropout1D(0.3)(embedding_layer)
    
    # Multiple GRU layers with different window sizes
    gru_layer1 = Bidirectional(
        GRU(
            units=128,
            return_sequences=True,
            dropout=0.2,
            recurrent_dropout=0.2,
            kernel_regularizer=tf.keras.regularizers.l2(1e-5)
        )
    )(spatial_dropout)
    
    gru_layer2 = Bidirectional(
        GRU(
            units=64,
            return_sequences=True,
            dropout=0.2,
            recurrent_dropout=0.2
        )
    )(gru_layer1)
    
    # Multi-head self-attention (simplified version)
    attention_layer = tf.keras.layers.MultiHeadAttention(
        num_heads=8,
        key_dim=16
    )(gru_layer2, gru_layer2)
    
    # Skip connection
    concat_layer = tf.keras.layers.Concatenate()([gru_layer2, attention_layer])
    
    # Feature extraction with pooling operations
    avg_pool = GlobalAveragePooling1D()(concat_layer)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(concat_layer)
    
    # Combine pooled features
    concat_pools = tf.keras.layers.Concatenate()([avg_pool, max_pool])
    
    # Deep MLP layers with batch normalization and more dropout
    x = Dense(256, activation='relu')(concat_pools)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(128, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(0.2)(x)
    
    # Output layer
    output_layer = Dense(2, activation='softmax', dtype='float32')(x)
    
    # Create model
    model = Model(inputs=input_layer, outputs=output_layer)
    
    return model

# Custom macroF1 Score Metric
class MacroF1Score(tf.keras.metrics.Metric):
    def __init__(self, num_classes = 2, name='macro_f1_score', **kwargs):
        super(MacroF1Score, self).__init__(name=name, **kwargs)
        self.num_classes = num_classes
        self.tp = self.add_weight(name='tp', initializer='zeros')
        self.fp = self.add_weight(name='fp', initializer='zeros')
        self.fn = self.add_weight(name='fn', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert probabilities to predicted class indices
        y_pred = tf.argmax(y_pred, axis=-1)
        
        # Convert one-hot encoded y_true to class indices if needed
        if len(y_true.shape) > 1 and y_true.shape[-1] > 1:
            y_true = tf.argmax(y_true, axis=-1)
        
        # Initialize confusion matrix
        conf_matrix = tf.math.confusion_matrix(
            y_true,
            y_pred,
            num_classes=self.num_classes,
            dtype=tf.float32
        )
        
        # Calculate TP, FP, FN for each class
        diag = tf.linalg.diag_part(conf_matrix)
        row_sum = tf.reduce_sum(conf_matrix, axis=1)
        col_sum = tf.reduce_sum(conf_matrix, axis=0)
        
        tp = diag
        fp = col_sum - diag
        fn = row_sum - diag
        
        # Update the state variables
        self.tp.assign_add(tf.reduce_sum(tp))
        self.fp.assign_add(tf.reduce_sum(fp))
        self.fn.assign_add(tf.reduce_sum(fn))
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        # Calculate precision and recall
        precision = self.tp / (self.tp + self.fp + tf.keras.backend.epsilon())
        recall = self.tp / (self.tp + self.fn + tf.keras.backend.epsilon())
        
        # Calculate F1 score
        f1 = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
        
        # Return macro F1 (average of per-class F1 scores)
        return f1

    def reset_states(self):
        self.tp.assign(0.)
        self.fp.assign(0.)
        self.fn.assign(0.)
        self.count.assign(0.)
            
# Model Training
def train_and_validate_model(model, X_train, y_train, X_val, y_val, batch_size=32, epochs=15, model_dir='models_en_task1_m2'):
    """
    Trains the GRU-Attention model with early stopping and model checkpointing
    Returns the best model and training history
    """
    # Create directory for saving models if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    
    # Callbacks
    early_stopping = EarlyStopping(
        monitor='macro_f1_score',
        patience=2,
        restore_best_weights=True,
        mode='max',
        verbose=1
    )
    
    model_checkpoint = ModelCheckpoint(
        os.path.join(model_dir, 'best_model_en_task1_m2.h5'),  # Save entire model
        monitor='macro_f1_score',
        mode='max',
        save_best_only=True,
        verbose=1
    )
    
    # Compile model with Adam optimizer (as per paper)
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy', MacroF1Score(num_classes=2)]
    )
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stopping, model_checkpoint],
        verbose=1
    )
    
    # Load the best model found during training
    best_model = load_model(os.path.join(model_dir, 'best_model_en_task1_m2.h5'), 
                          custom_objects={'MacroF1Score': MacroF1Score})
    
    return history, best_model

# Plot Training History
def plot_training_history(history, plot_dir='plots_nlp_project_en_task1_m2'):
    """
    Plots training history (accuracy and loss curves)
    Saves plots to specified directory
    """
    os.makedirs(plot_dir, exist_ok=True)
    
    # Plot training history
    plt.figure(figsize=(12, 5))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, 'training_history_en_task1_m2.png'))
    plt.close()

# Validation Evaluation
def evaluate_validation(model, X_val, y_val, plot_dir='plots_nlp_project_en_task1_m2'):
    """
    Evaluates the model on validation data and saves metrics and plots
    """
    os.makedirs(plot_dir, exist_ok=True)
    
    # Predict probabilities
    y_pred_proba = model.predict(X_val, batch_size=32)
    
    # Convert to class labels
    y_pred = np.argmax(y_pred_proba, axis=1)
    y_true = np.argmax(y_val, axis=1)
    
    # Calculate metrics
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    weighted_f1 = f1_score(y_true, y_pred, average='weighted')
    macro_f1 = f1_score(y_true, y_pred, average='macro')

    
    # Classification report
    report = classification_report(y_true, y_pred, target_names=['not_hate', 'hate'])
    
    # Confusion matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Not Hate', 'Hate'],
                yticklabels=['Not Hate', 'Hate'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix (Validation)')
    plt.savefig(os.path.join(plot_dir, 'confusion_matrix_val_en_task1_m2.png'))
    plt.close()
    
    return {
        'precision': precision,
        'recall': recall,
        'f1_score_weighted': weighted_f1,
        'f1_score_macro': macro_f1,
        'classification_report': report,
        'confusion_matrix': conf_matrix
    }


# Main Execution for Training and Validation
if __name__ == "__main__":
    # Split into train (80%) and validation (20%)
    
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    print(f"Training samples: {len(X_train)}")
    print(f"Validation samples: {len(X_val)}")
    
    # Create model - using GRU with Attention instead of CNN-BiLSTM
    embed_size = embedding_matrix.shape[1]
    model = create_gru_attention_model(max_len, max_features, embedding_matrix, embed_size)
    
    # Print model summary
    model.summary()
    
    # Train model
    history, trained_model = train_and_validate_model(
        model, X_train, y_train, X_val, y_val,
        batch_size=32,
        epochs=15  
    )
    
    # Plot training history
    plot_training_history(history)
    
    # Evaluate on validation set
    val_results = evaluate_validation(trained_model, X_val, y_val)
    
    print("\nValidation Results:")
    print(f"Precision: {val_results['precision']:.4f}")
    print(f"Recall: {val_results['recall']:.4f}")
    print(f"weighted F1 Score: {val_results['f1_score_weighted']:.4f}")
    print(f"macro F1 Score: {val_results['f1_score_macro']:.4f}")
    print("\nClassification Report:")
    print(val_results['classification_report'])

Training samples: 5224
Validation samples: 1307




Epoch 1/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step - accuracy: 0.5997 - loss: 0.8197 - macro_f1_score: 0.5997 
Epoch 1: macro_f1_score improved from -inf to 0.67094, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 220ms/step - accuracy: 0.6002 - loss: 0.8190 - macro_f1_score: 0.6002 - val_accuracy: 0.8080 - val_loss: 0.4896 - val_macro_f1_score: 0.8080
Epoch 2/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step - accuracy: 0.7533 - loss: 0.5743 - macro_f1_score: 0.7533 
Epoch 2: macro_f1_score improved from 0.67094 to 0.76053, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 194ms/step - accuracy: 0.7534 - loss: 0.5743 - macro_f1_score: 0.7534 - val_accuracy: 0.8080 - val_loss: 0.5596 - val_macro_f1_score: 0.8080
Epoch 3/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step - accuracy: 0.7755 - loss: 0.5475 - macro_f1_score: 0.7755 
Epoch 3: macro_f1_score improved from 0.76053 to 0.77680, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 204ms/step - accuracy: 0.7755 - loss: 0.5475 - macro_f1_score: 0.7755 - val_accuracy: 0.8125 - val_loss: 0.5243 - val_macro_f1_score: 0.8125
Epoch 4/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - accuracy: 0.7758 - loss: 0.5276 - macro_f1_score: 0.7758 
Epoch 4: macro_f1_score improved from 0.77680 to 0.77967, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 204ms/step - accuracy: 0.7758 - loss: 0.5276 - macro_f1_score: 0.7758 - val_accuracy: 0.8064 - val_loss: 0.4959 - val_macro_f1_score: 0.8064
Epoch 5/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step - accuracy: 0.7910 - loss: 0.4725 - macro_f1_score: 0.7910 
Epoch 5: macro_f1_score improved from 0.77967 to 0.79537, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 218ms/step - accuracy: 0.7910 - loss: 0.4724 - macro_f1_score: 0.7910 - val_accuracy: 0.8240 - val_loss: 0.4067 - val_macro_f1_score: 0.8240
Epoch 6/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step - accuracy: 0.8158 - loss: 0.4029 - macro_f1_score: 0.8158 
Epoch 6: macro_f1_score improved from 0.79537 to 0.81528, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 208ms/step - accuracy: 0.8158 - loss: 0.4030 - macro_f1_score: 0.8158 - val_accuracy: 0.8164 - val_loss: 0.4214 - val_macro_f1_score: 0.8164
Epoch 7/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step - accuracy: 0.8372 - loss: 0.3781 - macro_f1_score: 0.8372 
Epoch 7: macro_f1_score improved from 0.81528 to 0.83576, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 245ms/step - accuracy: 0.8371 - loss: 0.3781 - macro_f1_score: 0.8371 - val_accuracy: 0.8240 - val_loss: 0.4582 - val_macro_f1_score: 0.8240
Epoch 8/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271ms/step - accuracy: 0.8576 - loss: 0.3461 - macro_f1_score: 0.8576 
Epoch 8: macro_f1_score improved from 0.83576 to 0.85337, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 296ms/step - accuracy: 0.8575 - loss: 0.3461 - macro_f1_score: 0.8575 - val_accuracy: 0.7835 - val_loss: 0.4762 - val_macro_f1_score: 0.7835
Epoch 9/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367ms/step - accuracy: 0.8672 - loss: 0.3142 - macro_f1_score: 0.8672 
Epoch 9: macro_f1_score improved from 0.85337 to 0.86390, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 391ms/step - accuracy: 0.8672 - loss: 0.3142 - macro_f1_score: 0.8672 - val_accuracy: 0.8141 - val_loss: 0.4763 - val_macro_f1_score: 0.8141
Epoch 10/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - accuracy: 0.8748 - loss: 0.2927 - macro_f1_score: 0.8748 
Epoch 10: macro_f1_score improved from 0.86390 to 0.87883, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 388ms/step - accuracy: 0.8748 - loss: 0.2927 - macro_f1_score: 0.8748 - val_accuracy: 0.7934 - val_loss: 0.4612 - val_macro_f1_score: 0.7934
Epoch 11/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401ms/step - accuracy: 0.9006 - loss: 0.2567 - macro_f1_score: 0.9006 
Epoch 11: macro_f1_score improved from 0.87883 to 0.89433, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 426ms/step - accuracy: 0.9005 - loss: 0.2568 - macro_f1_score: 0.9005 - val_accuracy: 0.8164 - val_loss: 0.6783 - val_macro_f1_score: 0.8164
Epoch 12/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396ms/step - accuracy: 0.9039 - loss: 0.2475 - macro_f1_score: 0.9039 
Epoch 12: macro_f1_score improved from 0.89433 to 0.90295, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 426ms/step - accuracy: 0.9039 - loss: 0.2476 - macro_f1_score: 0.9039 - val_accuracy: 0.8171 - val_loss: 0.7095 - val_macro_f1_score: 0.8171
Epoch 13/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step - accuracy: 0.9165 - loss: 0.2261 - macro_f1_score: 0.9165  
Epoch 13: macro_f1_score improved from 0.90295 to 0.91137, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 305ms/step - accuracy: 0.9165 - loss: 0.2261 - macro_f1_score: 0.9165 - val_accuracy: 0.7781 - val_loss: 0.5374 - val_macro_f1_score: 0.7781
Epoch 14/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step - accuracy: 0.9246 - loss: 0.2066 - macro_f1_score: 0.9246 
Epoch 14: macro_f1_score improved from 0.91137 to 0.91750, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 226ms/step - accuracy: 0.9245 - loss: 0.2066 - macro_f1_score: 0.9245 - val_accuracy: 0.8141 - val_loss: 0.6896 - val_macro_f1_score: 0.8141
Epoch 15/15
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step - accuracy: 0.9270 - loss: 0.2007 - macro_f1_score: 0.9270 
Epoch 15: macro_f1_score improved from 0.91750 to 0.93051, saving model to models_en_task1_m2\best_model_en_task1_m2.h5




[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 223ms/step - accuracy: 0.9270 - loss: 0.2007 - macro_f1_score: 0.9270 - val_accuracy: 0.8057 - val_loss: 0.6777 - val_macro_f1_score: 0.8057
Restoring model weights from the end of the best epoch: 15.




[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 90ms/step

Validation Results:
Precision: 0.7948
Recall: 0.8057
weighted F1 Score: 0.7994
macro F1 Score: 0.6672

Classification Report:
              precision    recall  f1-score   support

    not_hate       0.87      0.90      0.88      1056
        hate       0.49      0.42      0.45       251

    accuracy                           0.81      1307
   macro avg       0.68      0.66      0.67      1307
weighted avg       0.79      0.81      0.80      1307



In [12]:
d2= pd.read_csv('test_en_l1.csv')
d2
d2 = d2.rename(columns={'key' : 'unique_id', 'sentence' : 'text'})
d2.to_csv('updated_test_en_l1.csv', index=False)
# d2

# Convert annotator columns to numeric without replacing NaNs
d2[['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']] = d2[
    ['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']
].apply(pd.to_numeric, errors='coerce')  # NaNs are retained

# Compute 'label' based on majority voting while ignoring NaNs
d2['label'] = (d2[['en_a1', 'en_a2', 'en_a3', 'en_a4', 'en_a5', 'en_a6']].mean(axis=1, skipna=True) >= 0.5).astype(int)
d2

Unnamed: 0,text,unique_id,en_a1,en_a2,en_a3,en_a4,en_a5,en_a6,label
0,"""who's this tweet about"" bitch it's about you ...",question_1,,0.0,,,0.0,1.0,0
1,#AlkaLamba should’ve used her slippers to slap...,question_1,0.0,0.0,,0.0,,,0
2,#CoronaVillains #COVIDIDIOT #COVID2019 is a...,question_1,,,1.0,,1.0,1.0,1
3,"(Acts 29:19994) LIKE the female ""governors"" of...",question_1,,1.0,1.0,,1.0,,1
4,***Let us celebrate “secular” as a beautiful w...,question_1,,0.0,1.0,0.0,,,0
...,...,...,...,...,...,...,...,...,...
1102,you should be tired of not having any kind of ...,question_1,0.0,,1.0,,,1.0,1
1103,“The limits of tyrants are prescribed by the e...,question_1,0.0,,0.0,,0.0,,0
1104,♋️🦀 like wtf my symbol is 69 and I’m a crab an...,question_1,,,0.0,,0.0,1.0,0
1105,"🇺🇸: marry a millionaire guy, divorce get half ...",question_1,,,1.0,,1.0,1.0,1


In [13]:
d2.loc[:, 'binary_label'] = d2['label'].apply(determine_binary_label)

# # Reorder columns
d2 = d2[['unique_id', 'text', 'binary_label', 'label']]

d2.to_csv('updated_test_en_l1.csv', index=False)

d2.loc[:, 'text'] = d2['text'].apply(lambda x: normalize_text(x))
processed_list = []
for j in d2['text']:
    process = j.replace('...','')
    processed_list.append(process)

df_processed = pd.DataFrame(processed_list)
df_processed.columns = ['text']
df_processed.head(n=5)

X = list(df_processed['text'])
y = d2[['label']].values

X = tokenizer.texts_to_sequences(X)

# Padding
X = pad_sequences(X, padding='post', maxlen=max_len)

y = label_encoder.fit_transform(y.ravel())

y = to_categorical(y, num_classes=2)


In [14]:
test_results = evaluate_validation(trained_model, X, y)

print(r"\Test Results:")
print(f"Precision: {test_results['precision']:.4f}")
print(f"Recall: {test_results['recall']:.4f}")
print(f"weighted F1 Score: {test_results['f1_score_weighted']:.4f}")
print(f"macro F1 Score: {test_results['f1_score_macro']:.4f}")
print("\nClassification Report:")
print(test_results['classification_report'])

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step
\Test Results:
Precision: 0.7915
Recall: 0.8067
weighted F1 Score: 0.7967
macro F1 Score: 0.6773

Classification Report:
              precision    recall  f1-score   support

    not_hate       0.86      0.91      0.88       877
        hate       0.55      0.42      0.47       230

    accuracy                           0.81      1107
   macro avg       0.70      0.66      0.68      1107
weighted avg       0.79      0.81      0.80      1107

