This file contains the program used to hyperparameter tune our hyperparameters with pytorch:
Learning Rate
Number of Epochs
Batch Size


In [1]:
from functions_text_model import *
import os

In [2]:
current_directory = os.getcwd()
print("Current directory:", current_directory)

contents = os.listdir(current_directory)
print("Contents of current directory:", contents)

#### Change the Dataset here when neccessary!
json_file_path = os.path.join(current_directory, 'data', 'mixed_data.json')
print(json_file_path)

Current directory: c:\Users\celin\OneDrive\Documents\EPFL\MA2\Deep learning\Inclusivity-in-Sarcasm-Detection
Contents of current directory: ['.git', '.gitignore', 'audio_model-spectrogram.ipynb', 'data', 'functions_audio_model.py', 'functions_text_model.py', 'functions_video_model.py', 'hubert_base_online_tokenizer.ipynb', 'hyperparameter_tuning.ipynb', 'hyperparameter_tuning_text_context.ipynb', 'models', 'README.md', 'requirements.txt', 'results', 'results.json', 'results_hyperparameter_text_context_mixed.json', 'results_tinybert.json', 'results_tinybert_linear_added.json', 'text-data-preparation.ipynb', 'text-model-evaluation.ipynb', 'text-model-training.ipynb', 'text-model-training_context.ipynb', 'Tryouts', 'video-model-training.ipynb', 'vocab.json', 'z-old-text-model-BERT-training.ipynb', 'z-old-text-model.ipynb', '__pycache__']
c:\Users\celin\OneDrive\Documents\EPFL\MA2\Deep learning\Inclusivity-in-Sarcasm-Detection\data\mixed_data.json


In [3]:
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader
import json
from transformers import BertForSequenceClassification, BertTokenizer
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.utils.data import random_split
from sklearn.model_selection import ParameterGrid


# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Function to encode the text
def encode_text(text):
    encoded_dict = tokenizer.encode_plus(
                        text,                      # Input text
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 64,           # Pad & truncate all sentences
                        truncation = True,
                        padding = 'max_length',
                        return_attention_mask = True,   # Construct attention masks
                        return_tensors = 'pt',     # Return pytorch tensors
                   )
    return encoded_dict['input_ids'], encoded_dict['attention_mask']

# PyTorch Dataset
class SarcasmDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        utterance = item['utterance']
        context = item['context']
        utterance_and_context = ' '.join([sentence for sentence in context] + [utterance]) # Combining the utterance and its context into one string.
        sarcasm = int(item['sarcasm'])
        input_ids, attention_mask = encode_text(utterance_and_context)
        return input_ids.flatten(), attention_mask.flatten(), sarcasm
    
# Create the DataLoader
# Load the data from the JSON file
with open(json_file_path) as f:
    data = json.load(f)

# Convert the data to a list of dictionaries
data = list(data.values())

dataset = SarcasmDataset(data)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
torch.manual_seed(42)
    
# Set device
device = torch.device("cpu")

dropout_prob = 0
# Define the hyperparameters to tune
param_grid = {
    'lr': [1e-3, 1e-4, 1e-5],
    'num_epochs': [7],
    'batch_size': [8, 16, 32, 64],
    'weight_decay': [0.05],
    'dropout_prob': [0]
}

# Create a parameter grid
grid = ParameterGrid(param_grid)

# Initialize a list to store the results
results = []

# Define the size of the training set and the test set
train_size = int(0.8 * len(dataset))  # 80% of the data for training
test_size = len(dataset) - train_size  # 20% of the data for testing

# Split the dataset
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


# For each combination of hyperparameters
for params in grid:

    # Create the DataLoaders
    train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=params['batch_size'], shuffle=True)

    # Create a new model
    model = BertForSequenceClassification.from_pretrained(
        "prajjwal1/bert-tiny",
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False,
    )

    model.classifier = nn.Sequential(
        nn.Dropout(params['dropout_prob']),
        nn.Linear(in_features=128, out_features=64, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=64, out_features=16, bias=True),
        nn.Tanh(),
        nn.Linear(in_features=16, out_features=2, bias=True)
    )

    model.to(device)

    # Create a new optimizer with the current learning rate
    optimizer = AdamW(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])

    # Create the optimizer  
    criterion = nn.CrossEntropyLoss()

    # Define metrics
    metrics = {'ACC': acc, 'F1-weighted': f1}

    # Train and evaluate the model for the current number of epochs
    for epoch in range(params['num_epochs']):
        print(f"Epoch {epoch + 1}")
        print('learning rate:', params['lr'], 'batch size:', params['batch_size'], 'num_epochs:', params['num_epochs'])
        train_loss, train_metrics = train_epoch(model, optimizer, criterion, metrics, train_dataloader, device)
        eval_loss, eval_metrics = evaluate(model, criterion, metrics, test_dataloader, device)

    # Store the results
    results.append({
        'lr': params['lr'],
        'batch_size': params['batch_size'],
        'num_epochs': params['num_epochs'],
        'eval_loss': eval_loss,
        'eval_metrics': eval_metrics
    })

# Save the results to a JSON file
#### Change Filename here when needed!
with open('results/results_hyperparameter_text_context_mixed.json', 'w') as f:
    json.dump(results, f, indent=4)

print(results)

train Loss: 0.6848,  ACC: 0.5815, F1-weighted: 0.4975


100%|██████████| 2/2 [00:00<00:00,  6.55it/s]

eval Loss: 0.6881,  ACC: 0.5433, F1-weighted: 0.4684
[{'lr': 0.001, 'batch_size': 8, 'num_epochs': 7, 'eval_loss': 0.8265854511409998, 'eval_metrics': {'ACC': 0.6138392857142857, 'F1-weighted': 0.5397935397935398}}, {'lr': 0.0001, 'batch_size': 8, 'num_epochs': 7, 'eval_loss': 0.6539842803031206, 'eval_metrics': {'ACC': 0.7410714285714286, 'F1-weighted': 0.6933032245532246}}, {'lr': 1e-05, 'batch_size': 8, 'num_epochs': 7, 'eval_loss': 0.6643021740019321, 'eval_metrics': {'ACC': 0.6774553571428571, 'F1-weighted': 0.6097031440781442}}, {'lr': 0.001, 'batch_size': 16, 'num_epochs': 7, 'eval_loss': 1.209587313234806, 'eval_metrics': {'ACC': 0.590625, 'F1-weighted': 0.5817139355742297}}, {'lr': 0.0001, 'batch_size': 16, 'num_epochs': 7, 'eval_loss': 0.5911248326301575, 'eval_metrics': {'ACC': 0.678125, 'F1-weighted': 0.6661327438900969}}, {'lr': 1e-05, 'batch_size': 16, 'num_epochs': 7, 'eval_loss': 0.6588964387774467, 'eval_metrics': {'ACC': 0.6864583333333334, 'F1-weighted': 0.6671410278


