In [None]:
from transformers import AutoModel, AutoTokenizer 
import torch
import pickle 
import numpy as np
import pandas as pd 
import re
from tqdm import tqdm
import seaborn as sns
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import silhouette_score
from torch import nn
import os
from sklearn.utils import shuffle
import torch.nn.functional as F
import torch.optim as optim
import time
from sklearn.metrics import classification_report
from Attention_Augmented_Conv2d.attention_augmented_conv import AugmentedConv
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
from ark_tweet_pos import CMUTweetTagger
import shlex
run_tagger_cmd = "java -XX:ParallelGCThreads=10 -Xmx500m -jar ark_tweet_pos/ark-tweet-nlp-0.3.2.jar"
import FeaturesText
import wandb
wandb.login()
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
from torch_lr_finder import LRFinder
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

## Configuration model

In [None]:
class Attention(nn.Module):
    """ Applies attention mechanism on the `context` using the `query`.

    **Thank you** to IBM for their initial implementation of :class:`Attention`. Here is
    their `License
    <https://github.com/IBM/pytorch-seq2seq/blob/master/LICENSE>`__.

    Args:
        dimensions (int): Dimensionality of the query and context.
        attention_type (str, optional): How to compute the attention score:

            * dot: :math:`score(H_j,q) = H_j^T q`
            * general: :math:`score(H_j, q) = H_j^T W_a q`

    Example:

         >>> attention = Attention(256)
         >>> query = torch.randn(5, 1, 256)
         >>> context = torch.randn(5, 5, 256)
         >>> output, weights = attention(query, context)
         >>> output.size()
         torch.Size([5, 1, 256])
         >>> weights.size()
         torch.Size([5, 1, 5])
    """

    def __init__(self, dimensions, attention_type='general'):
        super(Attention, self).__init__()

        if attention_type not in ['dot', 'general']:
            raise ValueError('Invalid attention type selected.')

        self.attention_type = attention_type
        if self.attention_type == 'general':
            self.linear_in = nn.Linear(dimensions, dimensions, bias=False)

        self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False)
        self.softmax = nn.Softmax(dim=-1)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(0.5)
    def forward(self, query, context):
        """
        Args:
            query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of
                queries to query the context.
            context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data
                overwhich to apply the attention mechanism.

        Returns:
            :class:`tuple` with `output` and `weights`:
            * **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]):
              Tensor containing the attended features.
            * **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]):
              Tensor containing attention weights.
        """
        batch_size, output_len, dimensions = query.size()
        query_len = context.size(1)

        if self.attention_type == "general":
            query = query.reshape(batch_size * output_len, dimensions)
            query = self.linear_in(query)
            query = query.reshape(batch_size, output_len, dimensions)

        # TODO: Include mask on PADDING_INDEX?

        # (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) ->
        # (batch_size, output_len, query_len);
        attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous())

        # Compute weights across every context sequence
        attention_scores = attention_scores.view(batch_size * output_len, query_len)
        attention_weights = self.softmax(attention_scores)
        attention_weights = attention_weights.view(batch_size, output_len, query_len)

        # (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) ->
        # (batch_size, output_len, dimensions)
        mix = torch.bmm(attention_weights, context)

        # concat -> (batch_size * output_len, 2*dimensions)
        combined = torch.cat((mix, query), dim=2)
        combined = combined.view(batch_size * output_len, 2 * dimensions)

        # Apply linear_out on every 2nd dimension of concat
        # output -> (batch_size, output_len, dimensions)
        output = self.linear_out(combined).view(batch_size, output_len, dimensions)
        output = self.dropout(self.tanh(output))

        return output, attention_weights


In [None]:
class baseline_sentence(nn.Module):
    def __init__(self):
        super(baseline_sentence, self).__init__()
        self.bgru = nn.GRU(2304, 1152, num_layers = 1, bidirectional = True, batch_first=True)
        #self.bgru2 = nn.GRU(2304, 1152, num_layers = 1, bidirectional = True, batch_first=True)
        self.attention1 = Attention(2304, 'dot')
        self.max_pool = nn.MaxPool1d(9)
        self.dense1 = nn.Linear(2304, 512)
#         self.dense2 = nn.Linear(512,128)
#         self.dense3 = nn.Linear(128,64)
#         self.dense4 = nn.Linear(64,32)
#         self.dense5 = nn.Linear(32, 16)
        self.dense6 = nn.Linear(512,2)
        self.drop = nn.Dropout(0.4)
        
    def forward(self, input1): 
        
        gru, _ = self.bgru(input1)
        gru = self.drop(gru)
        attention1, _ = self.attention1(gru, input1)
        #gru, _ = self.bgru2(attention1)
        #gru = self.drop(gru)
#         attention1, _ = self.attention1(gru, attention1)
        
        flattening = torch.squeeze(attention1, 1)
        
        dense = self.drop(F.relu(self.dense1(flattening)))
#         dense = self.normalization2(dense)
#         dense = self.drop(F.relu(self.dense2(dense)))
#         dense = self.drop(F.relu(self.dense3(dense)))
#         dense = self.drop(F.relu(self.dense4(dense)))
#         dense = self.drop(F.relu(self.dense5(dense)))
        
        output = self.dense6(dense)
        
        return output

In [None]:
class baseline(nn.Module):
    def __init__(self):
        super(baseline, self).__init__()
        
        self.conv2d = nn.Conv1d(4,3, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d.weight, gain=5/3)
        self.conv2d2 = nn.Conv1d(3,2, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d2.weight, gain=5/3)
        self.conv2d3 = nn.Conv1d(2,1, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d3.weight, gain=5/3)
        self.bgru = nn.GRU(input_size=768, hidden_size=384, num_layers=1, batch_first=True, bidirectional=True)
        self.attention = Attention(768,attention_type = 'dot')
        self.normalization = nn.BatchNorm1d(num_features=3)
        self.normalization2 = nn.BatchNorm1d(num_features=2)
        self.normalization3 = nn.BatchNorm1d(num_features=1)
        self.drop = nn.Dropout(0.4)
        self.drop2 = nn.Dropout(0.4)
        self.drop3 = nn.Dropout(0.5)
        self.dense7 = nn.Linear(768,256)
        self.dense8 = nn.Linear(256,2)
        
    def forward(self, input1):
        conv = self.drop3(F.relu(self.conv2d(input1)))
        conv = self.normalization(conv)
        attention_1, _ = self.attention(conv, input1) # N x 1 x 768
        
        conv = self.drop(F.relu(self.conv2d2(conv)))
#         #conv = self.normalization2(conv)
        attention_2, _ = self.attention(conv, attention_1)
        
        conv = self.drop(F.relu(self.conv2d3(conv)))
#         #conv = self.normalization3(conv)
        attention_3, _ = self.attention(conv, attention_2)
        
#         gru, _ = self.bgru(conv)
#         #gru = self.normalization3(gru)
#         attention, weights = self.attention(conv, gru)
        flattening = self.drop(torch.squeeze(attention_3, 1))
        dense = self.drop3(F.relu(self.dense7(flattening)))
        output  = self.dense8(dense)
        return output

In [None]:
def custom_undersampling(ground_truth, batch_last):
    
    idx_irony = (ground_truth == 1.0).nonzero().flatten()
    idx_not_irony = (ground_truth == 0.0).nonzero().flatten()

    x_irony = batch_last[idx_irony]
    x_not_irony = batch_last[idx_not_irony]

    y_irony = ground_truth[idx_irony]
    y_not_irony = ground_truth[idx_not_irony]

    perm = torch.randperm(y_not_irony.size()[0])
    idx = perm[:y_irony.size()[0]]
    samples_not_irony = x_not_irony[idx,:]
    samples_y_not = y_not_irony[idx]

    concat_x = torch.cat([x_irony, samples_not_irony], dim = 0)
    concat_y = torch.cat([y_irony, samples_y_not], dim = 0)

    X, y = shuffle(concat_x, concat_y)

    return X, y 

In [None]:
def ramloader_sentence_train(batch_size, ground_truth,batch_last):
    batch_last, ground_truth = custom_undersampling(ground_truth, batch_last)
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield batch_la, y_target

In [None]:
def ramloader_sentence(batch_size, ground_truth,batch_last):
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield batch_la, y_target

In [None]:
def ramloader_light_train(batch_size, ground_truth,batch_last):
    batch_last, ground_truth = custom_undersampling(ground_truth, batch_last)
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield torch.squeeze(batch_la, 2),y_target

In [None]:
def ramloader_light(batch_size, ground_truth,batch_last):
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield torch.squeeze(batch_la, 2),y_target

In [None]:
emoji_train = np.load('../Code/Deep_moji_feature/train/irony/sentence_emoji_train.npy')
emoji_y = np.load('../Code/Deep_moji_feature/train/irony/y_emoji_train.npy')

In [None]:
emoji_train = torch.tensor(emoji_train, dtype=torch.float)
emoji_y = torch.tensor(emoji_y, dtype=torch.long)

In [None]:
emoji_train = torch.unsqueeze(emoji_train,1)

In [None]:
# batch_initial = torch.load('../data/new_approach/train/sarcasm/init_layer.pt')
# batch_middle = torch.load( '../data/new_approach/train/sarcasm/middle_layer.pt')
batch_last = torch.load('../data/new_approach/train/irony/last_layer.pt')
ground_truth = torch.load('../data/new_approach/train/irony/y_train.pt')
# pos_tensor = torch.load('../data/new_approach/train/sarcasm/pos_tensor.pt')

In [None]:
# batch_val = torch.load('../data/new_approach/train/irony_validation/last_layer.pt')
# ground_val = torch.load('../data/new_approach/train/irony_validation/y_train.pt')

In [None]:
mymodel = baseline_sentence()
mymodel.to(device)

### Find the best lr 

In [None]:
class SentenceEmb(torch.utils.data.Dataset):
    """Face Landmarks dataset."""

    def __init__(self, x_train, y_train):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.x = torch.squeeze(x_train,2)
        self.y = y_train

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        
        ground_truth = torch.squeeze(self.y[idx]).long()
        x = self.x[idx,:]
    
        return x, ground_truth

In [None]:
training_set = SentenceEmb(X_train, y_train)
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
trainloader = torch.utils.data.DataLoader(training_set, **params)

In [None]:
validation_set = SentenceEmb(X_test, y_test)
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
testloader = torch.utils.data.DataLoader(validation_set, **params)

In [None]:
criterion = nn.CrossEntropyLoss()
learning_rate = 2e-5
optimizer = torch.optim.AdamW(mymodel2.parameters(), lr=learning_rate, weight_decay=1e-8)
lr_finder = LRFinder(mymodel2, optimizer, criterion, device="cuda")
lr_finder.range_test(trainloader, end_lr=1e-1, num_iter=500, step_mode="exp")
lr_finder.reset()

In [None]:
lr_finder.plot()

In [None]:
lr_finder.reset()

In [None]:
lr_finder.range_test(trainloader, val_loader=testloader, end_lr=1e-1, num_iter=1000, step_mode="exp")

In [None]:
lr_finder.plot()

In [None]:
mymodel.load_state_dict(torch.load('../Code/model_pytorch/model_0.7834.pt'))

### Kernel PCA

In [None]:
from sklearn.decomposition import PCA
transformer = PCA(0.95)
transformer2 = PCA(344)
transformer3 = PCA(344)
transformer4 = PCA(344)

In [None]:
feat1 = torch.unsqueeze(torch.tensor(transformer.fit_transform(torch.squeeze(batch_last[:, 0],1))), 1)
feat2 = torch.unsqueeze(torch.tensor(transformer2.fit_transform(torch.squeeze(batch_last[:, 1],1))),1)
feat3 = torch.unsqueeze(torch.tensor(transformer3.fit_transform(torch.squeeze(batch_last[:, 2],1))),1)
feat4 = torch.unsqueeze(torch.tensor(transformer4.fit_transform(torch.squeeze(batch_last[:, 3],1))),1)

In [None]:
train = torch.unsqueeze(torch.cat((feat1,feat2,feat3,feat4), 1), 2)

### Splitting data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(emoji_train, emoji_y, stratify = emoji_y, test_size = 0.05, shuffle = True)

## Training phase

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
learning_rate = 2e-5
optimizer = torch.optim.AdamW(mymodel.parameters(), lr=learning_rate, weight_decay=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=5, verbose=True)

In [None]:
accuracy_epoch = []
loss_epoch = []
accuracy_validation = []
loss_validation = []
best_val = 0
for epoch in range(100):  # loop over the dataset multiple times
    trainloader = ramloader_light_train(32, y_train, X_train)
    accuracy_step = []
    loss_step = []
    for i, data in enumerate(trainloader):
        
        layer_high = data[0].to(device)
        labels = data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = mymodel(layer_high)
        loss = criterion(outputs,  torch.squeeze(labels).long())
        loss.backward()
        optimizer.step() 
        
        stepsize = int(y_train.shape[0]//32)
        outputs = F.softmax(outputs)
        _, predicted = torch.max(outputs,1)
        y_actual = torch.squeeze(labels).cpu()
        acc = accuracy_score(y_actual, predicted.cpu())
        accuracy_step.append(accuracy_score(y_actual, predicted.cpu()))
        loss_step.append(loss.item())
        print('Epoch {}, Step {}/{}, Loss: {}, Accuracy: {}'.format(epoch,i,stepsize, loss.item(), acc), end = '\r')
        
    mean_accuracy = np.mean(accuracy_step)
    accuracy_epoch.append(mean_accuracy)
    loss_epoch.append(np.mean(loss_step))
    print("Accuracy epoch {}: {}".format(epoch, mean_accuracy), end = '\r')
    
    with torch.no_grad():
        valoader = ramloader_light(32, y_test, X_test)
        accuracy_step = []
        loss_step = []
        for i, data in enumerate(valoader):

            layer_high = data[0].to(device)
            labels = data[1].to(device)

            outputs = mymodel(layer_high)
            loss_val = criterion(outputs, torch.squeeze(labels).long())
            outputs = F.softmax(outputs)
            _, predicted = torch.max(outputs,1)
            y_actual = torch.squeeze(labels).cpu()
            accuracy_step.append(accuracy_score(y_actual, predicted.cpu()))
            loss_step.append(loss_val.item())
            
        mean_accuracy = np.mean(accuracy_step)
        accuracy_validation.append(mean_accuracy)
        loss_validation.append(np.mean(loss_step))

        if mean_accuracy > best_val:
            best_val = mean_accuracy
            torch.save(mymodel.state_dict(), '../Code/model_pytorch_irony/model_{}.pt'.format(best_val.round(4)))

    scheduler.step(np.mean(loss_step))

In [None]:
iterat = list(range(len(accuracy_epoch)))

In [None]:
accur_tra = np.array(accuracy_epoch)
accr_vali = np.array(accuracy_validation)
loss_tra = np.array(loss_epoch)
loss_val = np.array(loss_validation)
iterat_n = np.array(iterat)

In [None]:
accur_tra = np.load( '../Code/model_pytorch2_irony/accuracy_train.npy')
accr_vali = np.load( '../Code/model_pytorch2_irony/accuracy_validation.npy')
loss_tra = np.load( '../Code/model_pytorch2_irony/loss_train.npy')
loss_val = np.load( '../Code/model_pytorch2_irony/loss_val.np.npy')
iterat_n = np.load( '../Code/model_pytorch2_irony/epochs.np.npy')

In [None]:
# np.save( '../Code/model_pytorch_irony/accuracy_train', accur_tra)
# np.save( '../Code/model_pytorch_irony/accuracy_validation', accr_vali)
# np.save( '../Code/model_pytorch_irony/loss_train', loss_tra)
# np.save( '../Code/model_pytorch_irony/loss_val.np', loss_val)
# np.save( '../Code/model_pytorch_irony/epochs.np', iterat_n)

In [None]:
plt.rcParams["font.weight"] = "bold"
plt.rcParams["axes.labelweight"] = "bold"
plt.figure(figsize = (5,5))
plt.grid(linestyle = 'dashed')
plt.plot(iterat_n[:35], accur_tra[:35], c = 'green')
plt.plot(iterat_n[:35], accr_vali[:35])
plt.title('Accuracy Metric with respect to DeepMoji Features')
plt.legend(['Train Set', 'Validation Set'])
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.savefig(r'D:\Data_Science_all\MSC_2_anno\Tesi_Irony_Sarcasm\thesis_latex\img\accuracy_model2_irony.png', dpi=500)

In [None]:
plt.rcParams["font.weight"] = "bold"
plt.rcParams["axes.labelweight"] = "bold"
plt.figure(figsize = (5,5))
plt.grid(linestyle = 'dashed')
plt.plot(iterat_n[:35], loss_tra[:35], c = 'green')
plt.plot(iterat_n[:35], loss_val[:35])
plt.title('Cross Entropy Loss with respect to DeepMoji Features')
plt.legend(['Train Set', 'Validation Set'])
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.savefig(r'D:\Data_Science_all\MSC_2_anno\Tesi_Irony_Sarcasm\thesis_latex\img\loss_model2_irony.png', dpi=500)

In [None]:
def normalize_pred(pred):
    numpy_list = [i.numpy() for i in pred]
    numpy_1vec = np.concatenate(numpy_list).ravel()
    return numpy_1vec

In [None]:
mymodel.load_state_dict(torch.load('../Code/model_pytorch2_irony/model_0.9065.pt'))

In [None]:
with torch.no_grad():
    prediction_val = []
    valoader = ramloader_light(9, y_test,X_test)
    accuracy_step = []
    loss_step = []
    for i, data in enumerate(valoader):

        layer_high = data[0].to(device)
        labels = data[1].to(device)

        outputs = mymodel(layer_high)
        _, predicted = torch.max(outputs, 1)
        outputs = mymodel(layer_high)
        y_actual = torch.squeeze(labels).cpu()
        acc = accuracy_score(y_actual, predicted.cpu())
        prediction_val.append(predicted.cpu())

In [None]:
print(classification_report(normalize_pred(prediction_val), y_train[:40518]))

In [None]:
print(classification_report(normalize_pred(prediction_val), y_test))

## Test set

In [None]:
batch_last_sem= torch.load('../data/new_approach/test/irony/last_layer_sem.pt')
ground_truth_sem = torch.load('../data/new_approach/test/irony/y_sem.pt')

In [None]:
emoji_train = np.load('../Code/Deep_moji_feature/test/sentence_emoji_sem.npy')
emoji_y = np.load('../Code/Deep_moji_feature/test/y_emoji_sem.npy')
emoji_train = torch.tensor(emoji_train, dtype=torch.float)
emoji_y = torch.tensor(emoji_y, dtype=torch.long)
emoji_train = torch.unsqueeze(emoji_train,1)

In [None]:
emoji_train.size()

In [None]:
with torch.no_grad():
    prediction_val = []
    valoader = ramloader_sentence(2,emoji_y,emoji_train)
    accuracy_step = []
    loss_step = []
    for i, data in enumerate(valoader):

        layer_high = data[0].to(device)
        labels = data[1].to(device)

        outputs = mymodel(layer_high)
        _, predicted = torch.max(outputs, 1)
        y_actual = torch.squeeze(labels).cpu()
        acc = accuracy_score(y_actual, predicted.cpu())
        prediction_val.append(predicted.cpu())

In [None]:
print('Semeval test, DeepMoji features')
print(classification_report(normalize_pred(prediction_val),emoji_y.numpy())) 