In [None]:
from transformers import AutoModel, AutoTokenizer 
import torch
import pickle 
import numpy as np
import pandas as pd 
import re
from tqdm import tqdm
import seaborn as sns
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import silhouette_score
from torch import nn
import os
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import classification_report
from Attention_Augmented_Conv2d.attention_augmented_conv import AugmentedConv
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
import math

### Model

In [None]:
class simple_attention(nn.Module):
    def __init__(self):
        super(simple_attention, self).__init__()
        
        self.conv_att = AugmentedConv(in_channels=4, out_channels=256, kernel_size=2, dk=3, dv=3, Nh=3, relative=False, stride=2)
        self.pooling1 = nn.AvgPool3d(kernel_size=(1,1,1), stride = (2,1,1))
        self.conv1 = nn.Conv2d(in_channels = 128, out_channels=64, kernel_size=2, stride = 2)
        self.pooling2 =  nn.AvgPool3d(kernel_size=(1,1,1), stride = (2,1,2))
        self.conv2 = nn.Conv2d(in_channels = 32, out_channels=16, kernel_size=1, stride = 2)
        self.max_pool = nn.MaxPool3d(kernel_size=(2,3,2), stride = (2,3,2))
        self.bgru = nn.GRU(input_size=192, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64,32)
        self.dropout = nn.Dropout(0.4)
        self.fc4 = nn.Linear(32,1)
    
    def forward(self, input1):
        conv_atten = self.conv_att(input1)
        conv_pooled1 = self.pooling1(conv_atten)
        conv_simple = self.conv1(conv_pooled1)
        conv_pooled2 = self.pooling2(conv_simple)
        conv_simple = self.conv2(conv_pooled2)
        conv_max = self.max_pool(conv_simple)
        flatten = torch.flatten(conv_max).reshape(conv_max.size(0), 1, 192)
        output_gru, hidden_gru = self.bgru(flatten)
        dense1 = F.relu(self.fc1(output_gru))
        dense2 = F.relu(self.fc2(dense1))
        dense2_drop = self.dropout(dense2)
        dense3 = F.relu(self.fc3(dense2_drop))
        dense3_drop = self.dropout(dense3)
        output = self.fc4(dense3_drop)
    
        return output 

In [None]:
mymodel = simple_attention()
mymodel.to(device)

In [None]:
ROOT_DIR = os.path.dirname(os.path.abspath('.')) 
data_dir = ROOT_DIR + '\\{}\\{}\\{}\\{}\\'.format('data', 'new_approach', 'train', 'sarcasm_word')
features_dir = data_dir + 'features\\'
label_dir = data_dir + 'labels\\'

In [None]:
def sort_number(elem):
    return int(re.findall('(\d*)(_.pt)', elem)[0][0])

### Loader

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, root_feat, root_label):
        self.files = os.listdir(root_feat)
        self.labels = os.listdir(root_label)
        self.files.sort(reverse=False, key=sort_number)
        self.labels.sort(reverse=False, key=sort_number)
        
        self.root_feat = root_feat
        self.root_label = root_label
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        sample = torch.load(os.path.join(self.root_feat, self.files[idx])) # load the features of this sample
        label = torch.load(os.path.join(self.root_label, self.labels[idx]))
        return sample, label

In [None]:
from sklearn.metrics import accuracy_score
def get_accuracy(output, actual):
    """
    Return the accuracy of the model on the input data and actual ground truth.
    """
    prob = torch.sigmoid(output)
   
    pred = torch.squeeze((prob > 0.50).type(torch.FloatTensor), -1)
    
    accuracy = accuracy_score(torch.squeeze(pred,0).cpu(), actual.cpu())
    return accuracy

In [None]:
def normalize_pred(pred):
    numpy_list = [i.numpy() for i in pred]
    numpy_1vec = np.concatenate(numpy_list).ravel()
    return numpy_1vec

In [None]:
dataset = MyDataset(features_dir,label_dir)
trainloader = torch.utils.data.DataLoader(dataset,shuffle=True,batch_size=5,num_workers=0, pin_memory=True)

In [None]:
data_dir = ROOT_DIR + '\\{}\\{}\\{}\\{}\\'.format('data', 'new_approach', 'validation', 'sarcasm_word')
features_dir = data_dir + 'features\\'
label_dir = data_dir + 'labels\\'
dataset_val = MyDataset(features_dir,label_dir)

In [None]:
def cyclical_lr(stepsize, min_lr=2e-5, max_lr=1e-3):

    # Scaler: we can adapt this if we do not want the triangular CLR
    scaler = lambda x: 1.

    # Lambda function to calculate the LR
    lr_lambda = lambda it: min_lr + (max_lr - min_lr) * relative(it, stepsize)

    # Additional function to see where on the cycle we are
    def relative(it, stepsize):
        cycle = math.floor(1 + it / (2 * stepsize))
        x = abs(it / stepsize - 2 * cycle + 1)
        return max(0, (1 - x)) * scaler(cycle)

    return lr_lambda

In [None]:
# optimizer = torch.optim.SGD(mymodel.parameters(), lr=0.1, momentum = 0.9)
total_epochs = 10
batch_size = 5
# from torch.optim.lr_scheduler import ReduceLROnPlateau
criterion = nn.BCEWithLogitsLoss()
# scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=1, verbose=True, min_lr = 0.0001)
optimizer = optim.SGD(mymodel.parameters(), lr=0.001, momentum=0.9)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5000, gamma=0.1,verbose=True)
# optimizer = torch.optim.Adam(mymodel.parameters(), lr=1.)
# step_size = len(data_iter_train)
clr = cyclical_lr(5000,  min_lr=2e-5, max_lr=1e-4)
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
#torch.optim.lr_scheduler.CyclicLR( optimizer , base_lr = 0.00001 , max_lr = 0.001, step_size_up = 2000 , step_size_down = None , mode = 'triangular' , gamma = 1.0 , scale_fn = None , scale_mode = 'cycle' , cycle_momentum = True , base_momentum = 0.8 , max_momentum = 0.9 , last_epoch = - 1 , verbose = False )

In [None]:
mymodel.load_state_dict(torch.load("../Code/model_pytorch/model_0.711.pt"))

In [None]:
accuracy_epoch = []
loss_epoch = []
accuracy_val_epoch = []
loss_val_epoch = []
best_val = 0
for step in range(total_epochs):
    trainloader = torch.utils.data.DataLoader(dataset,shuffle=True,batch_size=batch_size,num_workers=0, pin_memory=True)
    accuracy_step = []
    loss_step = []
    accuracy_step = []
    loss_step = []
    for i, data in enumerate(trainloader):
        input_embeddings = data[0].to(device)

        labels = torch.unsqueeze(data[1].to(device), -1)
        optimizer.zero_grad()
        outputs = mymodel(torch.squeeze(input_embeddings, 1))
        loss = criterion(outputs, torch.unsqueeze(labels, -1))
        torch.cuda.empty_cache()
        loss.backward()
        
        optimizer.step() 
        
        accuracy = get_accuracy(outputs, labels)
        accuracy_step.append(accuracy)
        loss_step.append(loss)    

        print('Epoch {}, Step: {} / {}, Loss: {}, Accuracy: {}'.format(step, i, len(trainloader), loss, accuracy), end = '\r')
        scheduler.step()
    mean_accuracy = np.mean(accuracy_step)
    accuracy_epoch.append(mean_accuracy)
    loss_epoch.append(torch.mean(torch.tensor(loss_step)))
    print("Accuracy epoch {}: {}".format(step, mean_accuracy), end = '\r')
    with torch.no_grad():
        valoader = torch.utils.data.DataLoader(dataset_val,shuffle=True,batch_size=batch_size,num_workers=0, pin_memory=True)
        accuracy_step = []
        loss_step = []
        for i, data in enumerate(valoader):

            input_embeddings = data[0].to(device)
            labels = torch.unsqueeze(data[1].to(device), -1)
            
            outputs = mymodel(torch.squeeze(input_embeddings, 1))
            
            loss_val = criterion(outputs, torch.unsqueeze(labels, -1))
            accuracy = get_accuracy(outputs, labels)
            accuracy_step.append(accuracy)
            loss_step.append(loss_val)
            
        mean_accuracy = np.mean(accuracy_step)
        accuracy_val_epoch.append(mean_accuracy)
        loss_val_epoch.append(torch.mean(torch.tensor(loss_step)))
        
        if mean_accuracy > best_val:
            best_val = mean_accuracy
            torch.save(mymodel.state_dict(), '../Code/model_pytorch/model_{}.pt'.format(best_val.round(3)))
    #scheduler.step()