In [85]:
import torch.nn as nn
import torch
from torch import optim
from sklearn.model_selection import train_test_split
from torch import optim
from sklearn.metrics import accuracy_score
import pickle

In [106]:
text_model_file = 'text/text_model'
visual_model_file = 'visual/visual_model'
audio_model_file = 'audio/audio_model'

In [102]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x

In [88]:
import json
text_data = json.load(open('text_features_clip.json', 'r'))
visual_data = json.load(open('video_embeddings_clip.json', 'r'))
audio_data = json.load(open('audio_features_wav2vec2_bert.json', 'r'))
label_data = json.load(open('sarcasm_data.json', 'r'))

In [97]:
import torch
from torch.utils.data import Dataset, DataLoader

class EmbeddingDataset(Dataset):
    def __init__(self, embedding_dict, label_dict):
        """
        Args:
            embedding_dict: A dictionary mapping IDs to embeddings (numpy arrays or lists).
            label_dict: A dictionary mapping IDs to labels (integers).
        """
        self.ids = list(embedding_dict.keys())
        self.embeddings = [torch.tensor(embedding_dict[id], dtype=torch.float32) for id in self.ids]
        self.labels = [torch.tensor(label_dict[id]['sarcasm'], dtype=torch.long) for id in self.ids]

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        return self.embeddings[index], self.labels[index]

indices_file = "split_indices.p"
def pickle_loader(filename):
    return pickle.load(open(filename, 'rb'), encoding="latin1")
split_indices = pickle_loader(indices_file)
dataset = EmbeddingDataset(audio_data, label_data)
device = 'cuda'

In [105]:
from sklearn.metrics import precision_recall_fscore_support


def get_dataloader(dataset, indices, batch_size, shuffle):
    subset = torch.utils.data.Subset(dataset, indices)
    return DataLoader(subset, batch_size=batch_size, shuffle=shuffle)

# Train 5 models for each fold
for fold, (train_indices, val_indices) in enumerate(split_indices):
    print(f"Starting fold {fold+1}")
    
    train_loader = get_dataloader(dataset, train_indices, batch_size=2, shuffle=True)
    val_loader = get_dataloader(dataset, val_indices, batch_size=1, shuffle=False)
    
    model = SimpleNN(1024, 256, 2).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    
    best_acc = 0
    best_epoch = 0
    early_stop = 20
    epochs = 0
    
    while True:
        model.train()
        total_loss = 0
        for embeddings, labels in train_loader:
            optimizer.zero_grad()
            embeddings, labels = embeddings.to(device), labels.to(device)
            if next(model.parameters()).device != embeddings.device:
                model.to(embeddings.device)

            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for embeddings, labels in val_loader:
                embeddings, labels = embeddings.to(device), labels.to(device)
                outputs = model(embeddings)
                predictions = torch.argmax(outputs, dim=1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predictions.cpu().numpy())
        
        accuracy = accuracy_score(y_true, y_pred)
        if accuracy > best_acc:
            precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
            print(f'Fold {fold+1}, Epoch {epochs}, Loss: {total_loss}, Accuracy: {accuracy}')
            best_acc = accuracy
            best_epoch = epochs
            torch.save(model.cpu().state_dict(), f'model/{audio_model_file}_fold_{fold+1}.pt')
        
        if epochs - best_epoch > early_stop:
            break
        epochs += 1

    print(f"Fold {fold+1} complete. Best accuracy: {best_acc} at epoch {best_epoch}")
    print(f"Precision: {precision}, Recall: {recall}, F1: {f1}")

Starting fold 1
Fold 1, Epoch 0, Loss: 174.33255933225155, Accuracy: 0.6594202898550725
Fold 1, Epoch 2, Loss: 148.10255958419293, Accuracy: 0.6811594202898551
Fold 1, Epoch 7, Loss: 109.96931778441649, Accuracy: 0.717391304347826
Fold 1 complete. Best accuracy: 0.717391304347826 at epoch 7
Precision: 0.7032967032967034, Recall: 0.8421052631578947, F1: 0.7664670658682634
Starting fold 2
Fold 2, Epoch 0, Loss: 179.1945088505745, Accuracy: 0.6739130434782609
Fold 2, Epoch 1, Loss: 166.75129691511393, Accuracy: 0.6811594202898551
Fold 2, Epoch 2, Loss: 156.2831411100924, Accuracy: 0.7463768115942029
Fold 2, Epoch 5, Loss: 139.67985944915563, Accuracy: 0.7536231884057971
Fold 2 complete. Best accuracy: 0.7536231884057971 at epoch 5
Precision: 0.7216494845360825, Recall: 0.9090909090909091, F1: 0.8045977011494252
Starting fold 3
Fold 3, Epoch 0, Loss: 177.5731375068426, Accuracy: 0.7007299270072993
Fold 3, Epoch 19, Loss: 64.26443411647074, Accuracy: 0.708029197080292
Fold 3, Epoch 21, Loss

In [109]:
dataset = EmbeddingDataset(text_data, label_data)
from sklearn.metrics import precision_recall_fscore_support


def get_dataloader(dataset, indices, batch_size, shuffle):
    subset = torch.utils.data.Subset(dataset, indices)
    return DataLoader(subset, batch_size=batch_size, shuffle=shuffle)

# Train 5 models for each fold
for fold, (train_indices, val_indices) in enumerate(split_indices):
    print(f"Starting fold {fold+1}")
    
    train_loader = get_dataloader(dataset, train_indices, batch_size=2, shuffle=True)
    val_loader = get_dataloader(dataset, val_indices, batch_size=1, shuffle=False)
    
    model = SimpleNN(512, 256, 2).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    
    best_acc = 0
    best_epoch = 0
    early_stop = 20
    epochs = 0
    
    while True:
        model.train()
        total_loss = 0
        for embeddings, labels in train_loader:
            optimizer.zero_grad()
            embeddings, labels = embeddings.to(device), labels.to(device)
            if next(model.parameters()).device != embeddings.device:
                model.to(embeddings.device)

            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for embeddings, labels in val_loader:
                embeddings, labels = embeddings.to(device), labels.to(device)
                outputs = model(embeddings)
                predictions = torch.argmax(outputs, dim=1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predictions.cpu().numpy())
        
        accuracy = accuracy_score(y_true, y_pred)
        if accuracy > best_acc:
            precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
            print(f'Fold {fold+1}, Epoch {epochs}, Loss: {total_loss}, Accuracy: {accuracy}')
            best_acc = accuracy
            best_epoch = epochs
            torch.save(model.cpu().state_dict(), f'model/{text_model_file}_fold_{fold+1}.pt')
        
        if epochs - best_epoch > early_stop:
            break
        epochs += 1

    print(f"Fold {fold+1} complete. Best accuracy: {best_acc} at epoch {best_epoch}")
    print(f"Precision: {precision}, Recall: {recall}, F1: {f1}")

Starting fold 1
Fold 1, Epoch 0, Loss: 186.1306072473526, Accuracy: 0.5507246376811594
Fold 1, Epoch 1, Loss: 149.9215216189623, Accuracy: 0.5797101449275363
Fold 1, Epoch 2, Loss: 112.1975835878402, Accuracy: 0.6014492753623188
Fold 1, Epoch 6, Loss: 19.448124623376316, Accuracy: 0.6086956521739131
Fold 1, Epoch 7, Loss: 12.696646797327048, Accuracy: 0.6304347826086957
Fold 1 complete. Best accuracy: 0.6304347826086957 at epoch 7
Precision: 0.6060606060606061, Recall: 0.6153846153846154, F1: 0.6106870229007635
Starting fold 2
Fold 2, Epoch 0, Loss: 188.8181961774826, Accuracy: 0.6231884057971014
Fold 2, Epoch 6, Loss: 20.320686713377654, Accuracy: 0.6376811594202898
Fold 2, Epoch 9, Loss: 9.732707844786091, Accuracy: 0.644927536231884
Fold 2, Epoch 12, Loss: 6.11457789492033, Accuracy: 0.6666666666666666
Fold 2 complete. Best accuracy: 0.6666666666666666 at epoch 12
Precision: 0.6527777777777778, Recall: 0.6911764705882353, F1: 0.6714285714285714
Starting fold 3
Fold 3, Epoch 0, Loss:

In [110]:
dataset = EmbeddingDataset(visual_data, label_data)
from sklearn.metrics import precision_recall_fscore_support


def get_dataloader(dataset, indices, batch_size, shuffle):
    subset = torch.utils.data.Subset(dataset, indices)
    return DataLoader(subset, batch_size=batch_size, shuffle=shuffle)

# Train 5 models for each fold
for fold, (train_indices, val_indices) in enumerate(split_indices):
    print(f"Starting fold {fold+1}")
    
    train_loader = get_dataloader(dataset, train_indices, batch_size=2, shuffle=True)
    val_loader = get_dataloader(dataset, val_indices, batch_size=1, shuffle=False)
    
    model = SimpleNN(512, 256, 2).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    
    best_acc = 0
    best_epoch = 0
    early_stop = 20
    epochs = 0
    
    while True:
        model.train()
        total_loss = 0
        for embeddings, labels in train_loader:
            optimizer.zero_grad()
            embeddings, labels = embeddings.to(device), labels.to(device)
            if next(model.parameters()).device != embeddings.device:
                model.to(embeddings.device)

            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for embeddings, labels in val_loader:
                embeddings, labels = embeddings.to(device), labels.to(device)
                outputs = model(embeddings)
                predictions = torch.argmax(outputs, dim=1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predictions.cpu().numpy())
        
        accuracy = accuracy_score(y_true, y_pred)
        if accuracy > best_acc:
            precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
            print(f'Fold {fold+1}, Epoch {epochs}, Loss: {total_loss}, Accuracy: {accuracy}')
            best_acc = accuracy
            best_epoch = epochs
            torch.save(model.cpu().state_dict(), f'model/{visual_model_file}_fold_{fold+1}.pt')
        
        if epochs - best_epoch > early_stop:
            break
        epochs += 1

    print(f"Fold {fold+1} complete. Best accuracy: {best_acc} at epoch {best_epoch}")
    print(f"Precision: {precision}, Recall: {recall}, F1: {f1}")

Starting fold 1
Fold 1, Epoch 0, Loss: 167.65489553660154, Accuracy: 0.6884057971014492
Fold 1, Epoch 1, Loss: 143.3527531567961, Accuracy: 0.7028985507246377
Fold 1 complete. Best accuracy: 0.7028985507246377 at epoch 1
Precision: 0.7160493827160493, Recall: 0.7631578947368421, F1: 0.7388535031847134
Starting fold 2
Fold 2, Epoch 0, Loss: 171.23269251734018, Accuracy: 0.7101449275362319
Fold 2, Epoch 2, Loss: 130.5143204294145, Accuracy: 0.7246376811594203
Fold 2, Epoch 3, Loss: 128.88086378760636, Accuracy: 0.7536231884057971
Fold 2 complete. Best accuracy: 0.7536231884057971 at epoch 3
Precision: 0.8028169014084507, Recall: 0.7402597402597403, F1: 0.7702702702702703
Starting fold 3
Fold 3, Epoch 0, Loss: 164.1997620910406, Accuracy: 0.7372262773722628
Fold 3 complete. Best accuracy: 0.7372262773722628 at epoch 0
Precision: 0.7301587301587301, Recall: 0.7076923076923077, F1: 0.7187500000000001
Starting fold 4
Fold 4, Epoch 0, Loss: 170.82658325880766, Accuracy: 0.6934306569343066
Fol