**Authors**: Céline Hirsch, Sandra Frey, Sina Röllin

**Deep Learning Project**: Inclusiveness in Sarcasm Detection

# Final Model Text

In [53]:
from functions_text_model import *
import os
import json
import random
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader
import json
from transformers import BertForSequenceClassification, BertTokenizer
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.utils.data import random_split
from sklearn.model_selection import ParameterGrid

First we have to load the datasets, which have already been prepared in `text-data-preparation.ipynb`. The different datasets are the mixed, the female and the male datasets. All of these datasets were split into training, validation and testing subsets.

In [72]:
# Load the data from the JSON files
with open('data/mixed_train_set.json') as file:
    mixed_train = json.load(file)

with open('data/mixed_val_set.json') as file:
    mixed_val = json.load(file)

with open('data/mixed_test_set.json') as file:
    mixed_test = json.load(file)

with open('data/M_train_set.json') as file:
    M_train = json.load(file)

with open('data/M_val_set.json') as file:
    M_val = json.load(file)

with open('data/M_test_set.json') as file:
    M_test = json.load(file)

with open('data/F_train_set.json') as file:
    F_train = json.load(file)

with open('data/F_val_set.json') as file:
    F_val = json.load(file)

with open('data/F_test_set.json') as file:
    F_test = json.load(file)

# Convert the data to a list of dictionaries
mixed_train_data = list(mixed_train.values())
mixed_val_data = list(mixed_val.values())
mixed_test_data = list(mixed_test.values())

M_train_data = list(M_train.values())
M_val_data = list(M_val.values())
M_test_data = list(M_test.values())

F_train_data = list(F_train.values())
F_val_data = list(F_val.values())
F_test_data = list(F_test.values())

Let's define some classes and functions needed in the training pipeline.

In [55]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


# Function to encode the text
def encode_text(text):
    encoded_dict = tokenizer.encode_plus(
                        text,                      # Input text
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 64,           # Pad & truncate all sentences
                        truncation = True,
                        padding = 'max_length',
                        return_attention_mask = True,   # Construct attention masks
                        return_tensors = 'pt',     # Return pytorch tensors
                   )
    return encoded_dict['input_ids'], encoded_dict['attention_mask']


# PyTorch Dataset
class SarcasmDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        utterance = item['utterance']
        context = item['context']
        utterance_and_context = ' '.join([sentence for sentence in context] + [utterance]) # Combining the utterance and its context into one string.
        sarcasm = int(item['sarcasm'])
        input_ids, attention_mask = encode_text(utterance_and_context)
        return input_ids.flatten(), attention_mask.flatten(), sarcasm
    

# Set seeds
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

We will now do the hyperparameter tuning for the 3 different models: 
- one model will be trained on the mixed data 
- one model will be trained on utterances from female speakers only
- one model will be trained on utterances from male speakers only

The performance of each of these models is then evaluated on the validation dataset. 

In [19]:
# HYPERPARAMETER TUNING MIXED MODEL

# Set seed
set_seed(42)
    
# Set device
device = torch.device("cpu")

dropout_prob = 0

# Define the hyperparameters to tune
param_grid = {
    'lr': [1e-3, 1e-4, 1e-5]
    'num_epochs': [20],
    'batch_size': [8, 16, 32, 64]
    'weight_decay': [0.05, 0.1]
    'dropout_prob': [0, 0.1]
}

# Create a parameter grid
grid = ParameterGrid(param_grid)

# Initialize a list to store the results
results = []

# Define the datasets
train_dataset = SarcasmDataset(mixed_train_data)
val_dataset = SarcasmDataset(mixed_val_data)

# For each combination of hyperparameters
for params in grid:

    # Create the DataLoaders
    train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'])
    val_dataloader = DataLoader(val_dataset, batch_size=params['batch_size'])

    # Create a new model
    model = BertForSequenceClassification.from_pretrained(
        "prajjwal1/bert-tiny",
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False,
    )

    model.classifier = nn.Sequential(
        nn.Dropout(params['dropout_prob']),
        nn.Linear(in_features=128, out_features=64, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=64, out_features=16, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=16, out_features=2, bias=True)
    )

    model.to(device)

    # Create a new optimizer with the current learning rate
    optimizer = AdamW(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])

    # Create the optimizer  
    criterion = nn.CrossEntropyLoss()

    # Define metrics
    metrics = {'ACC': acc, 'F1-weighted': f1}

    # Initialize lists to store losses and metrics
    train_loss_log, test_loss_log = [], []
    metrics_names = list(metrics.keys())
    train_metrics_log = [[] for _ in range(len(metrics))]
    test_metrics_log = [[] for _ in range(len(metrics))]

    # Train and evaluate the model for the current number of epochs
    for epoch in range(params['num_epochs']):
        print(f"Epoch {epoch + 1}")
        print('learning rate:', params['lr'], 'batch size:', params['batch_size'], 'num_epochs:', params['num_epochs'])
        train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
        val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

        # Log the losses and metrics
        train_loss_log.append(train_loss)
        test_loss_log.append(val_loss)
        train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
        test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

    # Store the results
    results.append({
        'lr': params['lr'],
        'batch_size': params['batch_size'],
        'num_epochs': params['num_epochs'],
        'weight_decay': params['weight_decay'],
        'dropout_prob': params['dropout_prob'],
        'eval_loss': val_loss,
        'eval_metrics': val_metrics
    })

    # Plot and save the training and testing metrics
    plot_filename = 'plot-test' #f'hyperparameter_tuning/plot_lr_{params["lr"]}_bs_{params["batch_size"]}_wd_{params["weight_decay"]}_dp_{params["dropout_prob"]}.png'
    plot_training_hyperparameters(train_loss_log, test_loss_log, metrics_names, train_metrics_log, test_metrics_log, plot_filename)

# Save the results to a JSON file
results_path = 'hyperparameter_tuning/text_hyperparameter_mixed.json'
os.makedirs(os.path.dirname(results_path), exist_ok=True)

with open(results_path, 'w') as f:
    json.dump(results, f, indent=4)

print(results)

train Loss: 0.0787,  ACC: 0.9840, F1-weighted: 0.8912


100%|██████████| 6/6 [00:00<00:00, 26.20it/s]


eval Loss: 0.8833,  ACC: 0.7500, F1-weighted: 0.5591
[{'lr': 0.0001, 'batch_size': 16, 'num_epochs': 20, 'weight_decay': 0.05, 'dropout_prob': 0, 'eval_loss': 0.883301788320144, 'eval_metrics': {'ACC': 0.75, 'F1-weighted': 0.5590762860328077}}]


In [66]:
# HYPERPARAMETER TUNING MALE MODEL

set_seed(42)
    
# Set device
device = torch.device("cpu")

dropout_prob = 0

# Define the hyperparameters to tune
param_grid = {
    'lr': [1e-4],
    'num_epochs': [20],
    'batch_size': [8, 16, 32, 64],
    'weight_decay': [0.05, 0.1],
    'dropout_prob': [0, 0.1]
}

# Create a parameter grid
grid = ParameterGrid(param_grid)

# Initialize a list to store the results
results = []

# Define the datasets
train_dataset = SarcasmDataset(M_train_data)
val_dataset = SarcasmDataset(M_val_data)

# For each combination of hyperparameters
for params in grid:

    # Create the DataLoaders
    train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'])
    val_dataloader = DataLoader(val_dataset, batch_size=params['batch_size'])

    # Create a new model
    model = BertForSequenceClassification.from_pretrained(
        "prajjwal1/bert-tiny",
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False,
    )

    model.classifier = nn.Sequential(
        nn.Dropout(params['dropout_prob']),
        nn.Linear(in_features=128, out_features=64, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=64, out_features=16, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=16, out_features=2, bias=True)
    )

    model.to(device)

    # Create a new optimizer with the current learning rate
    optimizer = AdamW(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])

    # Create the optimizer  
    criterion = nn.CrossEntropyLoss()

    # Define metrics
    metrics = {'ACC': acc, 'F1-weighted': f1}

    # Initialize lists to store losses and metrics
    train_loss_log, test_loss_log = [], []
    metrics_names = list(metrics.keys())
    train_metrics_log = [[] for _ in range(len(metrics))]
    test_metrics_log = [[] for _ in range(len(metrics))]

    # Train and evaluate the model for the current number of epochs
    for epoch in range(params['num_epochs']):
        print(f"Epoch {epoch + 1}")
        print('learning rate:', params['lr'], 'batch size:', params['batch_size'], 'num_epochs:', params['num_epochs'])
        train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
        val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

        # Log the losses and metrics
        train_loss_log.append(train_loss)
        test_loss_log.append(val_loss)
        train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
        test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

    # Store the results
    results.append({
        'lr': params['lr'],
        'batch_size': params['batch_size'],
        'num_epochs': params['num_epochs'],
        'weight_decay': params['weight_decay'],
        'dropout_prob': params['dropout_prob'],
        'eval_loss': val_loss,
        'eval_metrics': val_metrics
    })

    # Plot and save the training and testing metrics
    plot_filename = f'hyperparameter_tuning/M_plot_lr_{params["lr"]}_bs_{params["batch_size"]}_wd_{params["weight_decay"]}_dp_{params["dropout_prob"]}.png'
    plot_training_hyperparameters(train_loss_log, test_loss_log, metrics_names, train_metrics_log, test_metrics_log, plot_filename)

# Save the results to a JSON file
results_path = 'hyperparameter_tuning/text_hyperparameter_M.json'
os.makedirs(os.path.dirname(results_path), exist_ok=True)

with open(results_path, 'w') as f:
    json.dump(results, f, indent=4)

print(results)

train Loss: 0.5340,  ACC: 0.7685, F1-weighted: 0.5081


100%|██████████| 1/1 [00:00<00:00, 10.70it/s]


eval Loss: 0.6841,  ACC: 0.5870, F1-weighted: 0.5852
[{'lr': 0.0001, 'batch_size': 8, 'num_epochs': 20, 'weight_decay': 0.05, 'dropout_prob': 0, 'eval_loss': 0.6725867787996928, 'eval_metrics': {'ACC': 0.5833333333333334, 'F1-weighted': 0.5095571095571095}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 20, 'weight_decay': 0.1, 'dropout_prob': 0, 'eval_loss': 0.6949547131856283, 'eval_metrics': {'ACC': 0.4791666666666667, 'F1-weighted': 0.41111111111111115}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 20, 'weight_decay': 0.05, 'dropout_prob': 0.1, 'eval_loss': 0.6696785887082418, 'eval_metrics': {'ACC': 0.6180555555555556, 'F1-weighted': 0.37917637917637914}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 20, 'weight_decay': 0.1, 'dropout_prob': 0.1, 'eval_loss': 0.7038969000180563, 'eval_metrics': {'ACC': 0.5972222222222222, 'F1-weighted': 0.44983164983164986}}, {'lr': 0.0001, 'batch_size': 16, 'num_epochs': 20, 'weight_decay': 0.05, 'dropout_prob': 0, 'eval_loss': 1.15345803896586

In [78]:
# HYPERPARAMETER TUNING FEMALE MODEL

set_seed(42)
    
# Set device
device = torch.device("cpu")

dropout_prob = 0

# Define the hyperparameters to tune
param_grid = {
    'lr': [1e-4],
    'num_epochs': [10],
    'batch_size': [8, 16, 32, 64],
    'weight_decay': [0.05, 0.1],
    'dropout_prob': [0, 0.1]
}

# Create a parameter grid
grid = ParameterGrid(param_grid)

# Initialize a list to store the results
results = []

# Define the datasets
train_dataset = SarcasmDataset(F_train_data)
val_dataset = SarcasmDataset(F_val_data)

# For each combination of hyperparameters
for params in grid:

    # Create the DataLoaders
    train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'], worker_init_fn=lambda worker_id: set_seed(42))
    val_dataloader = DataLoader(val_dataset, batch_size=params['batch_size'])

    # Create a new model
    model = BertForSequenceClassification.from_pretrained(
        "prajjwal1/bert-tiny",
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False,
    )

    model.classifier = nn.Sequential(
        nn.Dropout(params['dropout_prob']),
        nn.Linear(in_features=128, out_features=64, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=64, out_features=16, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=16, out_features=2, bias=True)
    )

    model.to(device)

    # Create a new optimizer with the current learning rate
    optimizer = AdamW(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])

    # Create the optimizer  
    criterion = nn.CrossEntropyLoss()

    # Define metrics
    metrics = {'ACC': acc, 'F1-weighted': f1}

    # Initialize lists to store losses and metrics
    train_loss_log, test_loss_log = [], []
    metrics_names = list(metrics.keys())
    train_metrics_log = [[] for _ in range(len(metrics))]
    test_metrics_log = [[] for _ in range(len(metrics))]

    # Train and evaluate the model for the current number of epochs
    for epoch in range(params['num_epochs']):
        print(f"Epoch {epoch + 1}")
        print('learning rate:', params['lr'], 'batch size:', params['batch_size'], 'num_epochs:', params['num_epochs'])
        train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
        val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

        # Log the losses and metrics
        train_loss_log.append(train_loss)
        test_loss_log.append(val_loss)
        train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
        test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

    # Store the results
    results.append({
        'lr': params['lr'],
        'batch_size': params['batch_size'],
        'num_epochs': params['num_epochs'],
        'weight_decay': params['weight_decay'],
        'dropout_prob': params['dropout_prob'],
        'eval_loss': val_loss,
        'eval_metrics': val_metrics
    })

    # Plot and save the training and testing metrics
    plot_filename = f'hyperparameter_tuning/F_plot_lr_{params["lr"]}_bs_{params["batch_size"]}_wd_{params["weight_decay"]}_dp_{params["dropout_prob"]}.png'
    plot_training_hyperparameters(train_loss_log, test_loss_log, metrics_names, train_metrics_log, test_metrics_log, plot_filename)

# Save the results to a JSON file
results_path = 'hyperparameter_tuning/text_hyperparameter_F.json'
os.makedirs(os.path.dirname(results_path), exist_ok=True)

with open(results_path, 'w') as f:
    json.dump(results, f, indent=4)

print(results)

train Loss: 0.6328,  ACC: 0.6302, F1-weighted: 0.4904


100%|██████████| 1/1 [00:00<00:00,  8.00it/s]


eval Loss: 0.8266,  ACC: 0.4074, F1-weighted: 0.2895
[{'lr': 0.0001, 'batch_size': 8, 'num_epochs': 10, 'weight_decay': 0.05, 'dropout_prob': 0, 'eval_loss': 0.5999684780836105, 'eval_metrics': {'ACC': 0.65625, 'F1-weighted': 0.5086580086580086}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 10, 'weight_decay': 0.1, 'dropout_prob': 0, 'eval_loss': 0.6194020807743073, 'eval_metrics': {'ACC': 0.6875, 'F1-weighted': 0.5181818181818182}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 10, 'weight_decay': 0.05, 'dropout_prob': 0.1, 'eval_loss': 0.5742475241422653, 'eval_metrics': {'ACC': 0.75, 'F1-weighted': 0.5461538461538462}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 10, 'weight_decay': 0.1, 'dropout_prob': 0.1, 'eval_loss': 0.6570921838283539, 'eval_metrics': {'ACC': 0.625, 'F1-weighted': 0.48484848484848486}}, {'lr': 0.0001, 'batch_size': 16, 'num_epochs': 10, 'weight_decay': 0.05, 'dropout_prob': 0, 'eval_loss': 0.7052564173936844, 'eval_metrics': {'ACC': 0.5340909090909092, 'F1-

Now that we trained the different models with different hyperparameters and evaluated them on the validation sets, we can choose which hyperparameters give us the best model.

We then train those models again with the number of epochs we chose from the graphs. That way we can save the models to access them again later.

In [83]:
# TRAIN MIXED MODEL WITH CHOSEN HYPERPARAMETERS

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
lr = 0.0001
num_epochs = 20
batch_size = 16
weight_decay = 0.1
dropout_prob = 0

# Define the datasets
train_dataset = SarcasmDataset(mixed_train_data)
val_dataset = SarcasmDataset(mixed_val_data)

# Create the DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# Create a new model
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels = 2,
    output_attentions = False,
    output_hidden_states = False,
)

model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
)

model.to(device)

# Create a new optimizer with the current learning rate
optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

# Create the optimizer  
criterion = nn.CrossEntropyLoss()

# Define metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Initialize lists to store losses and metrics
train_loss_log, test_loss_log = [], []
metrics_names = list(metrics.keys())
train_metrics_log = [[] for _ in range(len(metrics))]
test_metrics_log = [[] for _ in range(len(metrics))]

# Train and evaluate the model for the current number of epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}")
    train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
    val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

    # Log the losses and metrics
    train_loss_log.append(train_loss)
    test_loss_log.append(val_loss)
    train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
    test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

# Save model
torch.save(model.state_dict(), 'models/mixed_model_text.pth')

train Loss: 0.0725,  ACC: 0.9853, F1-weighted: 0.9031


100%|██████████| 6/6 [00:00<00:00, 10.00it/s]

eval Loss: 0.8306,  ACC: 0.7708, F1-weighted: 0.5797





In [84]:
# TRAIN MALE MODEL WITH CHOSEN HYPERPARAMETERS

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
lr = 0.0001
num_epochs = 20
batch_size = 8
weight_decay = 0.1
dropout_prob = 0

# Define the datasets
train_dataset = SarcasmDataset(M_train_data)
val_dataset = SarcasmDataset(M_val_data)

# Create the DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# Create a new model
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels = 2,
    output_attentions = False,
    output_hidden_states = False,
)

model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
)

model.to(device)

# Create a new optimizer with the current learning rate
optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

# Create the optimizer  
criterion = nn.CrossEntropyLoss()

# Define metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Initialize lists to store losses and metrics
train_loss_log, test_loss_log = [], []
metrics_names = list(metrics.keys())
train_metrics_log = [[] for _ in range(len(metrics))]
test_metrics_log = [[] for _ in range(len(metrics))]

# Train and evaluate the model for the current number of epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}")
    train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
    val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

    # Log the losses and metrics
    train_loss_log.append(train_loss)
    test_loss_log.append(val_loss)
    train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
    test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

# Save model
torch.save(model.state_dict(), 'models/M_model_text.pth')

train Loss: 0.6388,  ACC: 0.6222, F1-weighted: 0.4687


100%|██████████| 6/6 [00:00<00:00, 39.27it/s]

eval Loss: 0.6557,  ACC: 0.6250, F1-weighted: 0.5232





In [85]:
# TRAIN FEMALE MODEL WITH CHOSEN HYPERPARAMETERS

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
lr = 0.0001
num_epochs = 9
batch_size = 8
weight_decay = 0.05
dropout_prob = 0.1

# Define the datasets
train_dataset = SarcasmDataset(F_train_data)
val_dataset = SarcasmDataset(F_val_data)

# Create the DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, worker_init_fn=lambda worker_id: set_seed(42))
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# Create a new model
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels = 2,
    output_attentions = False,
    output_hidden_states = False,
)

model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
)

model.to(device)

# Create a new optimizer with the current learning rate
optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

# Create the optimizer  
criterion = nn.CrossEntropyLoss()

# Define metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Initialize lists to store losses and metrics
train_loss_log, test_loss_log = [], []
metrics_names = list(metrics.keys())
train_metrics_log = [[] for _ in range(len(metrics))]
test_metrics_log = [[] for _ in range(len(metrics))]

# Train and evaluate the model for the current number of epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}")
    train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
    val_loss, val_metrics = evaluate(model, criterion, metrics, val_dataloader, device)

    # Log the losses and metrics
    train_loss_log.append(train_loss)
    test_loss_log.append(val_loss)
    train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, train_metrics)
    test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, val_metrics)

# Save model
torch.save(model.state_dict(), 'models/F_model_text.pth')

train Loss: 0.5139,  ACC: 0.8289, F1-weighted: 0.6225


100%|██████████| 4/4 [00:00<00:00, 44.89it/s]

eval Loss: 0.5974,  ACC: 0.7500, F1-weighted: 0.6643





Now that we have chosen the best hyperparameters for each model and saved the model weights, we will evaluate the model performances on the untouched test sets.

In [29]:
# Test mixed model performance on test set

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
batch_size = 16
dropout_prob = 0

# Define the dataset
test_dataset = SarcasmDataset(mixed_test_data)

# Create the DataLoader
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# Load the model
mixed_model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False,
)

mixed_model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
    )

# Load the weights
state_dict = torch.load("models/mixed_model_text.pth")
mixed_model.load_state_dict(state_dict)

mixed_model.eval()
mixed_model.to(device)

# Define the loss criterion
criterion = nn.CrossEntropyLoss()

# Define the metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Evaluate the model
test_loss, test_metrics = evaluate(mixed_model, criterion, metrics, test_dataloader, device)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 6/6 [00:00<00:00, 30.01it/s]

eval Loss: 0.5574,  ACC: 0.8507, F1-weighted: 0.7190





In [77]:
# Test male model performance on test set

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
batch_size = 32
dropout_prob = 0.1

# Define the dataset
test_dataset = SarcasmDataset(M_test_data)

# Create the DataLoader
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# Load the model
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False,
)

model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
    )

# Load the weights
state_dict = torch.load("models/M_model_text.pth")
model.load_state_dict(state_dict)

model.eval()
model.to(device)

# Define the loss criterion
criterion = nn.CrossEntropyLoss()

# Define the metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Evaluate the model
test_loss, test_metrics = evaluate(model, criterion, metrics, test_dataloader, device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2/2 [00:00<00:00, 14.43it/s]

eval Loss: 0.6473,  ACC: 0.7240, F1-weighted: 0.5045





In [82]:
# Test female model performance on test set

set_seed(42)
    
# Set device
device = torch.device("cpu")

# Set hyperparameters
batch_size = 16
dropout_prob = 0.1

# Define the dataset
test_dataset = SarcasmDataset(F_test_data)

# Create the DataLoader
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# Load the model
model = BertForSequenceClassification.from_pretrained(
    "prajjwal1/bert-tiny",
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False,
)

model.classifier = nn.Sequential(
    nn.Dropout(dropout_prob),
    nn.Linear(in_features=128, out_features=64, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=64, out_features=16, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=16, out_features=2, bias=True)
    )

# Load the weights
state_dict = torch.load("models/F_model_text.pth")
model.load_state_dict(state_dict)

model.eval()
model.to(device)

# Define the loss criterion
criterion = nn.CrossEntropyLoss()

# Define the metrics
metrics = {'ACC': acc, 'F1-weighted': f1}

# Evaluate the model
test_loss, test_metrics = evaluate(model, criterion, metrics, test_dataloader, device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2/2 [00:00<00:00, 39.86it/s]

eval Loss: 0.6024,  ACC: 0.7188, F1-weighted: 0.6522



