# Ensemble Model for Cyberbullying Detection

Combining LSTM and CNN predictions for improved cyberbullying detection

In [None]:
import sys
sys.path.append('..')

import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pickle

from src.models import LSTMClassifier, SimpleCNN
from src.utils import get_predictions, calculate_metrics
from src.train import clean_text, build_vocab, TextDataset
from torch.utils.data import DataLoader

In [None]:
df = pd.read_csv("../data/raw/train.csv")
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

df["clean_text"] = df["comment_text"].apply(clean_text)

X_train, X_test, y_train, y_test = train_test_split(
    df["clean_text"].values, df[label_cols].values,
    test_size=0.2, random_state=42
)

vocab = build_vocab(X_train, max_vocab=10000)
test_dataset = TextDataset(X_test, y_test, vocab, max_len=100)
test_loader = DataLoader(test_dataset, batch_size=64)

print(f"Test samples: {len(X_test)}")

## Load Pre-trained Models

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

lstm_model = LSTMClassifier(
    vocab_size=len(vocab),
    embedding_dim=100,
    hidden_dim=128,
    output_dim=len(label_cols),
    n_layers=2,
    dropout=0.3
).to(device)

cnn_model = SimpleCNN(
    vocab_size=len(vocab),
    embedding_dim=100,
    n_filters=100,
    filter_sizes=[3, 4, 5],
    output_dim=len(label_cols),
    dropout=0.5
).to(device)

try:
    lstm_model.load_state_dict(torch.load('../outputs/lstm_model.pt', map_location=device))
    cnn_model.load_state_dict(torch.load('../outputs/cnn_model.pt', map_location=device))
    print("Models loaded successfully")
except:
    print("Warning: Pre-trained models not found. Train LSTM and CNN models first.")

## Get Individual Predictions

In [None]:
def get_probabilities(model, iterator, device):
    model.eval()
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for batch in iterator:
            text, labels = batch
            text = text.to(device)
            predictions = model(text)
            probs = torch.sigmoid(predictions)
            
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    return np.array(all_probs), np.array(all_labels)

In [None]:
lstm_probs, y_true = get_probabilities(lstm_model, test_loader, device)
cnn_probs, _ = get_probabilities(cnn_model, test_loader, device)

print(f"LSTM predictions shape: {lstm_probs.shape}")
print(f"CNN predictions shape: {cnn_probs.shape}")

## Ensemble Methods

In [None]:
# average ensemble
ensemble_avg_probs = (lstm_probs + cnn_probs) / 2
ensemble_avg_preds = (ensemble_avg_probs > 0.5).astype(float)

# weighted ensemble (tune weights based on validation performance)
lstm_weight = 0.6
cnn_weight = 0.4
ensemble_weighted_probs = lstm_weight * lstm_probs + cnn_weight * cnn_probs
ensemble_weighted_preds = (ensemble_weighted_probs > 0.5).astype(float)

# max ensemble
ensemble_max_probs = np.maximum(lstm_probs, cnn_probs)
ensemble_max_preds = (ensemble_max_probs > 0.5).astype(float)

## Evaluation

In [None]:
lstm_preds = (lstm_probs > 0.5).astype(float)
cnn_preds = (cnn_probs > 0.5).astype(float)

metrics_lstm = calculate_metrics(y_true, lstm_preds, label_cols)
metrics_cnn = calculate_metrics(y_true, cnn_preds, label_cols)
metrics_avg = calculate_metrics(y_true, ensemble_avg_preds, label_cols)
metrics_weighted = calculate_metrics(y_true, ensemble_weighted_preds, label_cols)
metrics_max = calculate_metrics(y_true, ensemble_max_preds, label_cols)

print("Model Comparison:")
print(f"LSTM F1: {metrics_lstm['overall']['f1']:.4f}")
print(f"CNN F1: {metrics_cnn['overall']['f1']:.4f}")
print(f"Ensemble (Avg) F1: {metrics_avg['overall']['f1']:.4f}")
print(f"Ensemble (Weighted) F1: {metrics_weighted['overall']['f1']:.4f}")
print(f"Ensemble (Max) F1: {metrics_max['overall']['f1']:.4f}")

In [None]:
models = ['LSTM', 'CNN', 'Ensemble\n(Avg)', 'Ensemble\n(Weighted)', 'Ensemble\n(Max)']
f1_scores = [
    metrics_lstm['overall']['f1'],
    metrics_cnn['overall']['f1'],
    metrics_avg['overall']['f1'],
    metrics_weighted['overall']['f1'],
    metrics_max['overall']['f1']
]

plt.figure(figsize=(10, 5))
plt.bar(models, f1_scores)
plt.title('Model Comparison - Overall F1 Score')
plt.ylabel('F1 Score')
plt.ylim([min(f1_scores) - 0.01, max(f1_scores) + 0.01])
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('../outputs/ensemble_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# per-label comparison
lstm_f1 = [metrics_lstm[label]['f1'] for label in label_cols]
cnn_f1 = [metrics_cnn[label]['f1'] for label in label_cols]
ensemble_f1 = [metrics_avg[label]['f1'] for label in label_cols]

x = np.arange(len(label_cols))
width = 0.25

plt.figure(figsize=(12, 5))
plt.bar(x - width, lstm_f1, width, label='LSTM')
plt.bar(x, cnn_f1, width, label='CNN')
plt.bar(x + width, ensemble_f1, width, label='Ensemble')

plt.xlabel('Labels')
plt.ylabel('F1 Score')
plt.title('Per-label F1 Score Comparison')
plt.xticks(x, label_cols, rotation=45)
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('../outputs/ensemble_per_label.png', dpi=150, bbox_inches='tight')
plt.show()

## Save Best Ensemble

In [None]:
best_ensemble = 'weighted'
np.save('../outputs/ensemble_predictions.npy', ensemble_weighted_preds)
print(f"Best ensemble ({best_ensemble}) saved")