# SETUP

In [1]:
pip install torch transformers pandas

Note: you may need to restart the kernel to use updated packages.


In [None]:
%load_ext memory_profiler

# SOFT Voting

## Load Dataset

In [2]:
import os

# Specify the new directory path
new_directory = '/Users/levan/ATENEO MASTERAL/Thesis/Development'

# Change the current working directory
os.chdir(new_directory)

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AutoConfig
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Adjust file paths to your local system
file_path = 'Corpus/test_data_b.csv'

df = pd.read_csv(file_path)

# Split the data into training and test sets (70-30 split)
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

# Extract texts and labels
train_texts = train_df['text'].tolist()
train_labels = train_df['label'].tolist()

test_texts = test_df['text'].tolist()
test_labels = test_df['label'].tolist()

## Load Models and Tokenizers

In [4]:
def load_model_and_tokenizer(model_path, tokenizer_path, base_model):
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
    config = AutoConfig.from_pretrained(base_model, num_labels=2)
    model = BertForSequenceClassification(config)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("mps" if torch.backends.mps.is_available() else "cpu")), strict=False)
    model.eval()
    return model, tokenizer

# Adjust paths to your models and tokenizers on your local machine
model_info = {

    'HateBERT': {
        'model_path': 'BERT models/HateBERT-finetuned.bin',
        'tokenizer_path': 'BERT models/HateBERT-finetuned-tokenizer',
        'base_model': 'GroNLP/hateBERT'
    },
    
    'fBERT': {
        'model_path': 'BERT models/fBERT-finetuned.bin',
        'tokenizer_path': 'BERT models/fBERT-finetuned-tokenizer',
        'base_model': 'diptanu/fBERT'
    },

    'mBERT': {
        'model_path': 'BERT models/mbert-base-uncased-finetuned.bin',
        'tokenizer_path': 'BERT models/mbert-base-uncased-finetuned-tokenizer',
        'base_model': 'google-bert/bert-base-multilingual-uncased'
    }
}

models_and_tokenizers = {name: load_model_and_tokenizer(info['model_path'], info['tokenizer_path'], info['base_model']) for name, info in model_info.items()}

## Apply Tokenization

In [5]:
def texts_to_dataloader(texts, tokenizer, batch_size=32):
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")

    # Move tensors to the specified device
    input_ids = encodings['input_ids'].to(device)
    attention_mask = encodings['attention_mask'].to(device)
    
    dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
    dataloader = DataLoader(dataset, batch_size=batch_size)
    return dataloader

## Perform Soft Voting and Prediction

In [6]:
%%memit

def soft_voting_predict(models_and_tokenizers, texts):
    sum_probs = None
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    
    # Loop through each model and its corresponding tokenizer
    for name, (model, tokenizer) in models_and_tokenizers.items():
        
        # Explicitly move each model to the MPS device if available
        model.to(device)
        
        dataloader = texts_to_dataloader(texts, tokenizer)
        model_probs_list = []
        for batch in dataloader:
            input_ids, attention_mask = batch
            input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
            
            with torch.no_grad():
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                model_probs_list.append(probs.cpu().numpy())
        
        model_probs = np.concatenate(model_probs_list)
        
        if sum_probs is None:
            sum_probs = model_probs
        else:
            sum_probs += model_probs

    # Average the probabilities
    avg_probs = sum_probs / len(models_and_tokenizers)
    
    # Perform voting based on highest average probabilities
    final_preds = np.argmax(avg_probs, axis=-1)
    return final_preds

# Perform inference and voting
final_predictions = soft_voting_predict(models_and_tokenizers, test_texts)


peak memory: 2307.09 MiB, increment: -0.31 MiB


## Evaluate Model

In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Calculate precision, recall, accuracy, and F1 score
precision = precision_score(test_labels, final_predictions, average='binary')  # Adjust 'binary' as needed
recall = recall_score(test_labels, final_predictions, average='binary')  # Adjust 'binary' as needed
accuracy = accuracy_score(test_labels, final_predictions)  # Use the original test_labels list
f1 = f1_score(test_labels, final_predictions, average='binary')  # Adjust 'binary' as needed

# Generate confusion matrix
conf_matrix = confusion_matrix(test_labels, final_predictions)

# Print the metrics
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Ensemble accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:")
print(conf_matrix)

Precision: 0.8535117056856187
Recall: 0.8523714094856379
Ensemble accuracy: 0.8655256723716381
F1 Score: 0.8529411764705882
Confusion Matrix:
[[1556  219]
 [ 221 1276]]


# Validate on Data C

## Load Dataset

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load validation data
validation_file_path = 'Corpus/data_c.csv'
validation_df = pd.read_csv(validation_file_path)


# Prepare the validation texts and labels
validation_texts = validation_df['text'].tolist()
validation_labels = validation_df['label'].values  



## Perform Soft Voting

In [9]:
%%memit
# Perform inference and voting on the validation texts
validation_predictions = soft_voting_predict(models_and_tokenizers, validation_texts)

peak memory: 1867.45 MiB, increment: 0.00 MiB


## Evaluate Model

In [10]:
# Calculate precision, recall, accuracy, and F1 score
precision = precision_score(validation_labels, validation_predictions, average='binary')  
recall = recall_score(validation_labels, validation_predictions, average='binary')  
accuracy = accuracy_score(validation_labels, validation_predictions)
f1 = f1_score(validation_labels, validation_predictions, average='binary')  

# Generate confusion matrix
conf_matrix = confusion_matrix(validation_labels, validation_predictions)

# Print the metrics
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Validation accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:")
print(conf_matrix)

Precision: 0.8567567567567568
Recall: 0.844582593250444
Validation accuracy: 0.8622112211221122
F1 Score: 0.8506261180679785
Confusion Matrix:
[[1139  159]
 [ 175  951]]
