In [None]:
from transformers import AutoModel, AutoTokenizer 
import torch
import pickle 
import numpy as np
import pandas as pd 
import re
from tqdm import tqdm
import seaborn as sns
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import silhouette_score
from torch import nn
import os
import torch.nn.functional as F
import torch.optim as optim
import time
from sklearn.metrics import classification_report
from Attention_Augmented_Conv2d.attention_augmented_conv import AugmentedConv
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
from ark_tweet_pos import CMUTweetTagger
import shlex
run_tagger_cmd = "java -XX:ParallelGCThreads=10 -Xmx500m -jar ark_tweet_pos/ark-tweet-nlp-0.3.2.jar"
import FeaturesText
# import wandb
# wandb.login()
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score

In [None]:
class Attention(nn.Module):
    """ Applies attention mechanism on the `context` using the `query`.

    **Thank you** to IBM for their initial implementation of :class:`Attention`. Here is
    their `License
    <https://github.com/IBM/pytorch-seq2seq/blob/master/LICENSE>`__.

    Args:
        dimensions (int): Dimensionality of the query and context.
        attention_type (str, optional): How to compute the attention score:

            * dot: :math:`score(H_j,q) = H_j^T q`
            * general: :math:`score(H_j, q) = H_j^T W_a q`

    Example:

         >>> attention = Attention(256)
         >>> query = torch.randn(5, 1, 256)
         >>> context = torch.randn(5, 5, 256)
         >>> output, weights = attention(query, context)
         >>> output.size()
         torch.Size([5, 1, 256])
         >>> weights.size()
         torch.Size([5, 1, 5])
    """

    def __init__(self, dimensions, attention_type='general'):
        super(Attention, self).__init__()

        if attention_type not in ['dot', 'general']:
            raise ValueError('Invalid attention type selected.')

        self.attention_type = attention_type
        if self.attention_type == 'general':
            self.linear_in = nn.Linear(dimensions, dimensions, bias=False)

        self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False)
        self.softmax = nn.Softmax(dim=-1)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(0.5)
    def forward(self, query, context):
        """
        Args:
            query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of
                queries to query the context.
            context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data
                overwhich to apply the attention mechanism.

        Returns:
            :class:`tuple` with `output` and `weights`:
            * **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]):
              Tensor containing the attended features.
            * **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]):
              Tensor containing attention weights.
        """
        batch_size, output_len, dimensions = query.size()
        query_len = context.size(1)

        if self.attention_type == "general":
            query = query.reshape(batch_size * output_len, dimensions)
            query = self.linear_in(query)
            query = query.reshape(batch_size, output_len, dimensions)

        # TODO: Include mask on PADDING_INDEX?

        # (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) ->
        # (batch_size, output_len, query_len);
        attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous())

        # Compute weights across every context sequence
        attention_scores = attention_scores.view(batch_size * output_len, query_len)
        attention_weights = self.softmax(attention_scores)
        attention_weights = attention_weights.view(batch_size, output_len, query_len)

        # (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) ->
        # (batch_size, output_len, dimensions)
        mix = torch.bmm(attention_weights, context)

        # concat -> (batch_size * output_len, 2*dimensions)
        combined = torch.cat((mix, query), dim=2)
        combined = combined.view(batch_size * output_len, 2 * dimensions)

        # Apply linear_out on every 2nd dimension of concat
        # output -> (batch_size, output_len, dimensions)
        output = self.linear_out(combined).view(batch_size, output_len, dimensions)
        output = self.dropout(self.tanh(output))

        return output, attention_weights


In [None]:
class baseline_sentence(nn.Module):
    def __init__(self):
        super(baseline_sentence, self).__init__()
        self.bgru = nn.GRU(2304, 1152, num_layers = 1, bidirectional = True, batch_first=True)
        #self.bgru2 = nn.GRU(2304, 1152, num_layers = 1, bidirectional = True, batch_first=True)
        self.attention1 = Attention(2304, 'dot')
        self.max_pool = nn.MaxPool1d(9)
        self.dense1 = nn.Linear(2304, 100)
#         self.dense2 = nn.Linear(512,128)
#         self.dense3 = nn.Linear(128,64)
#         self.dense4 = nn.Linear(64,32)
#         self.dense5 = nn.Linear(32, 16)
        self.dense6 = nn.Linear(100,2)
        self.drop = nn.Dropout(0.5)
        
    def forward(self, input1): 
        
        gru, _ = self.bgru(input1)
        gru = self.drop(gru)
        attention1, _ = self.attention1(gru, input1)
        #gru, _ = self.bgru2(attention1)
        #gru = self.drop(gru)
#         attention1, _ = self.attention1(gru, attention1)
        
        flattening = torch.squeeze(attention1, 1)
        
        dense = self.drop(F.relu(self.dense1(flattening)))
#         dense = self.normalization2(dense)
#         dense = self.drop(F.relu(self.dense2(dense)))
#         dense = self.drop(F.relu(self.dense3(dense)))
#         dense = self.drop(F.relu(self.dense4(dense)))
#         dense = self.drop(F.relu(self.dense5(dense)))
        
        output = self.dense6(dense)
        
        return output

In [None]:
class baseline(nn.Module):
    def __init__(self):
        super(baseline, self).__init__()
        
        self.conv2d = nn.Conv1d(4,3, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d.weight, gain=5/3)
        self.conv2d2 = nn.Conv1d(3,2, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d2.weight, gain=5/3)
        self.conv2d3 = nn.Conv1d(2,1, kernel_size=1)
        torch.nn.init.xavier_uniform_(self.conv2d3.weight, gain=5/3)
        self.bgru = nn.GRU(input_size=768, hidden_size=384, num_layers=1, batch_first=True, bidirectional=True)
        self.attention = Attention(768,attention_type = 'dot')
        self.normalization = nn.BatchNorm1d(num_features=3)
        self.normalization2 = nn.BatchNorm1d(num_features=2)
        self.normalization3 = nn.BatchNorm1d(num_features=1)
        self.drop = nn.Dropout(0.38)
        self.drop2 = nn.Dropout(0.4)
        self.drop3 = nn.Dropout(0.45)
        self.dense7 = nn.Linear(768,256)
        self.dense8 = nn.Linear(256,2)
        
    def forward(self, input1):
        conv = self.drop3(F.relu(self.conv2d(input1)))
        conv = self.normalization(conv)
        attention_1, _ = self.attention(conv, input1) # N x 1 x 768
        
        conv = self.drop(F.relu(self.conv2d2(conv)))
#         #conv = self.normalization2(conv)
        attention_2, _ = self.attention(conv, attention_1)
        
        conv = self.drop(F.relu(self.conv2d3(conv)))
#         #conv = self.normalization3(conv)
        attention_3, _ = self.attention(conv, attention_2)
        
#         gru, _ = self.bgru(conv)
#         #gru = self.normalization3(gru)
#         attention, weights = self.attention(conv, gru)
        flattening = torch.squeeze(attention_3, 1)
        dense = self.drop3(F.relu(self.dense7(flattening)))
        output  = self.dense8(dense)
        return output

In [None]:
def ramloader_sentence(batch_size, ground_truth,batch_last):
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield batch_la, y_target
def ramloader_light(batch_size, ground_truth,batch_last):
    n_batches_per_epoch = ground_truth.shape[0]//batch_size
    for i in range(n_batches_per_epoch):
        idx = list(range(ground_truth.shape[0])[batch_size*i:batch_size*(i+1)])
        try:
            y_target = ground_truth[idx]
            batch_la = batch_last[idx, :]
        except StopIteration:
            batch_la = batch_last[:idx[-1]+1,:]
            break    
        yield torch.squeeze(batch_la, 2),y_target

In [None]:
deepmoji = baseline_sentence()
bertweet = baseline()

In [None]:
bertweet.load_state_dict(torch.load('../Code/model_pytorch_semeval/model_best2.pt'))
deepmoji.load_state_dict(torch.load('../Code/model_pytorch2_semeval/model_0.767.pt'))

In [None]:
def normalize_lab(x):
    new_lab_list = []
    for i in x:
        if i == 0:
            new_lab = '1:1'
        else:
            new_lab =  '2:0'
            
        new_lab_list.append(new_lab)
        
    return new_lab_list

In [None]:
def mark_error(actual, predicted):
    mark_list = []
    for i,j in zip(actual, predicted):
        if i != j:
            mark = '+'
        else:
            mark = np.nan
            
        mark_list.append(mark)
        
    return mark_list

In [None]:
def get_proba_distrib(clf_proba):
    proba_ast = []
    for i,j in zip(clf_proba[:,0], clf_proba[:,1]):
        if i > j:
            proba = ['*{}'.format(str(i.round(5))), str(j.round(5))]
        else:
            proba = [str(i.round(5)), '*{}'.format(str(j.round(5)))]
            
        proba_ast.append(proba)
        
    return np.array(proba_ast)

In [None]:
def get_outpupt_bma(x, ground_truth, probability, prediction):
    
    actual = normalize_lab(ground_truth)
    predicted = normalize_lab(prediction)
    
    error = mark_error(actual, predicted)
    conta = 0
    lista_ins = []
    for i in range(len(x)):
        conta += 1
        if conta == int(len(x)/10) + 2:
            conta = 1
     
        lista_ins.append(conta)  
        
    instanc = lista_ins
    
    predict_proba = probability
    
    distribution = get_proba_distrib(predict_proba)
    
    final_df = pd.DataFrame(instanc, columns=['inst#'])
    
    final_df['actual'] = actual
    
    final_df['predicted'] = predicted
    
    final_df['error'] = error

    final_df['distribution'] = distribution[:, 0]
    
    final_df[''] = distribution[:,1]
    
    return final_df

In [None]:
batch_last_sem= torch.load('../data/new_approach/test/irony/last_layer_sem.pt')
ground_truth_sem = torch.load('../data/new_approach/test/irony/y_sem.pt')

In [None]:
emoji_train = np.load('../Code/Deep_moji_feature/test/sentence_emoji_sem.npy')
emoji_y = np.load('../Code/Deep_moji_feature/test/y_emoji_sem.npy')
emoji_train = torch.tensor(emoji_train, dtype=torch.float)
emoji_y = torch.tensor(emoji_y, dtype=torch.long)
emoji_train = torch.unsqueeze(emoji_train,1)

In [None]:
bertweet.to(device)
with torch.no_grad():
    prediction_val = []
    valoader = ramloader_light(2,ground_truth_sem, batch_last_sem)
    accuracy_step = []
    loss_step = []
    probability = []
    for i, data in enumerate(valoader):

        layer_high = data[0].to(device)
        labels = data[1].to(device)

        outputs = bertweet(layer_high)
        outputs = F.softmax(outputs)
        _, predicted = torch.max(outputs, 1)
        probability.append(outputs.cpu())
        y_actual = torch.squeeze(labels).cpu()
        prediction_val.append(predicted.cpu())

In [None]:
def normalize_proba(pred):
    numpy_list = [i.numpy() for i in pred]
    numpy_1vec = np.concatenate(numpy_list)
    return numpy_1vec

In [None]:
def normalize_pred(pred):
    numpy_list = [i.numpy() for i in pred]
    numpy_1vec = np.concatenate(numpy_list).ravel()
    return numpy_1vec

In [None]:
print('SemEval Bertweet')
print(classification_report(normalize_pred(prediction_val),ground_truth_sem.numpy())) 

In [None]:
x = batch_last_sem.numpy()
y = ground_truth_sem.numpy()
pred = normalize_pred(prediction_val)
probab = normalize_proba(probability)

In [None]:
df = get_outpupt_bma(x, y, probab,pred)

In [None]:
df.to_csv('D:/Data_Science_all/MSC_2_anno/Tesi_Irony_Sarcasm/Code/BMA/results_semeval/input/prediction_file/new_bertweet_semeval.csv', index = False)

In [None]:
deepmoji.to(device)
with torch.no_grad():
    prediction_val = []
    valoader = ramloader_light(2,emoji_y, emoji_train)
    accuracy_step = []
    loss_step = []
    probability = []
    for i, data in enumerate(valoader):

        layer_high = data[0].to(device)
        labels = data[1].to(device)

        outputs = deepmoji(layer_high)
        outputs = F.softmax(outputs)
        _, predicted = torch.max(outputs, 1)
        probability.append(outputs.cpu())
        y_actual = torch.squeeze(labels).cpu()
        prediction_val.append(predicted.cpu())

In [None]:
print('SemEval Deepmoji')
print(classification_report(normalize_pred(prediction_val),emoji_y.numpy())) 

In [None]:
x = emoji_train.numpy()
y = emoji_y.numpy()
pred = normalize_pred(prediction_val)
probab = normalize_proba(probability)

In [None]:
df = get_outpupt_bma(x, y, probab,pred)

In [None]:
df.to_csv('D:/Data_Science_all/MSC_2_anno/Tesi_Irony_Sarcasm/Code/BMA/results_semeval/input/prediction_file/new_deepmoji_semeval.csv', index = False)