In [None]:
!pip  install transformers


Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 29.5 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 5.3 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 40.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 52.3 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 41.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Fou

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
from transformers import BertTokenizer,BertModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,Dataset
from torch.nn.utils.rnn import pack_padded_sequence
from torch.optim import AdamW

In [None]:
import os
import gc
import copy
import time
import random
import string

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

# Utils
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

# For Transformer Models
from transformers import AutoTokenizer, AutoModel, AdamW

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class Bert_tweet_Arch(nn.Module):
    def __init__(self,pre_trained='vinai/bertweet-base'):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(pre_trained, output_hidden_states=True)
        output_channel = 16  # number of kernels
        num_classes = 2  # number of targets to predict
        dropout = 0.2  # dropout value
        embedding_dim = 768   # length of embedding dim

        ks = 3  # three conv nets here

        # input_channel = word embeddings at a value of 1; 3 for RGB images
        input_channel = 4  # for single embedding, input_channel = 1

        # [3, 4, 5] = window height
        # padding = padding to account for height of search window

        # 3 convolutional nets
        self.conv1 = nn.Conv2d(input_channel, output_channel, (3, embedding_dim), padding=(2, 0), groups=4)
        self.conv2 = nn.Conv2d(input_channel, output_channel, (4, embedding_dim), padding=(3, 0), groups=4)
        self.conv3 = nn.Conv2d(input_channel, output_channel, (5, embedding_dim), padding=(4, 0), groups=4)

        # apply dropout
        self.dropout = nn.Dropout(dropout)

        # fully connected layer for classification
        # 3x conv nets * output channel
        self.fc1 = nn.Linear(ks * output_channel, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, text_id, text_mask):
        # get the last 4 layers
        outputs= self.bert(text_id, attention_mask=text_mask)
        # all_layers  = [4, 16, 256, 768]
        hidden_layers = outputs[2]  # get hidden layers

        hidden_layers = torch.stack(hidden_layers, dim=1)
        x = hidden_layers[:, -4:] 
        # x = x.unsqueeze(1)
        # x = torch.mean(x, 0)
        # print(hidden_layers.size())
      
        torch.cuda.empty_cache()
        x = [F.relu(self.conv1(x)).squeeze(3), F.relu(self.conv2(x)).squeeze(3), F.relu(self.conv3(x)).squeeze(3)]
        # max-over-time pooling; # (batch, channel_output) * ks
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        # concat results; (batch, channel_output * ks)
        x = torch.cat(x, 1)
        # add dropout
        x = self.dropout(x)
        # generate logits (batch, target_size)
        logit = self.fc1(x)
        torch.cuda.empty_cache()
        return self.softmax(logit)

In [None]:
class Bert_tweet_Model_Arch(nn.Module):
    def __init__(self,pre_trained='vinai/bertweet-base'):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(pre_trained)
        self.hidden_size = self.bert.config.hidden_size
        self.LSTM = nn.LSTM(self.hidden_size,self.hidden_size,bidirectional=True)
        self.clf = nn.Linear(self.hidden_size*2,2)
        
    def forward(self,id,mask):
        
        encoded_layers = self.bert(input_ids=id,attention_mask=mask)
        encoded_layers = encoded_layers[0].permute(1, 0, 2)
        enc_hiddens, (last_hidden, last_cell) = self.LSTM(encoded_layers)
        output_hidden = torch.cat((last_hidden[0], last_hidden[1]), dim=1)
        output_hidden = F.dropout(output_hidden,0.2)
        output = self.clf(output_hidden)
        
        return F.softmax(output)

In [None]:
class BertBaseUncased_Model_Arch(nn.Module):
    def __init__(self,pre_trained='bert-base-uncased'):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(pre_trained, output_hidden_states=True)
        self.hidden_size = self.bert.config.hidden_size
        self.LSTM_1 = nn.LSTM(self.hidden_size,self.hidden_size,bidirectional=True)
        self.LSTM_2 = nn.LSTM(self.hidden_size,self.hidden_size,bidirectional=True)
        self.LSTM_3 = nn.LSTM(self.hidden_size,self.hidden_size,bidirectional=True)
        self.LSTM_4 = nn.LSTM(self.hidden_size,self.hidden_size,bidirectional=True)
        self.clf_1 = nn.Linear(self.hidden_size*8,30)
        self.clf_2=nn.Linear(30,30)
        self.clf_final=nn.Linear(60,2)
        
    def forward(self,text_id, text_mask,features):
        
        outputs = self.bert(input_ids=text_id,attention_mask=text_mask)
        encoded_layers = outputs[2]
        encoded_layer_1 = encoded_layers[-1].permute(1, 0, 2)
        encoded_layer_2 = encoded_layers[-2].permute(1, 0, 2)
        encoded_layer_3 = encoded_layers[-3].permute(1, 0, 2)
        encoded_layer_4 = encoded_layers[-3].permute(1, 0, 2)
        enc_hiddens, (last_hidden_1, last_cell) = self.LSTM_1(encoded_layer_1)
        enc_hiddens, (last_hidden_2, last_cell) = self.LSTM_2(encoded_layer_2)
        enc_hiddens, (last_hidden_3, last_cell) = self.LSTM_3(encoded_layer_3)
        enc_hiddens, (last_hidden_4, last_cell) = self.LSTM_4(encoded_layer_4)
        output_hidden = torch.cat((last_hidden_1[0], last_hidden_1[1],last_hidden_2[0], last_hidden_2[1],last_hidden_3[0], last_hidden_3[1],last_hidden_4[0], last_hidden_4[1]), dim=1)
        output_hidden = F.dropout(output_hidden,0.2)
        output_hidden = self.clf_1(output_hidden)
        out_2=self.clf_2(features)
        output = torch.cat((output_hidden,out_2), dim=1)
        output=self.clf_final(output)
        
        return F.softmax(output)

In [None]:
class Deberta_Model_Arch(nn.Module):

    def __init__(self):
        super(Deberta_Model_Arch, self).__init__()
        self.bert = AutoModel.from_pretrained('microsoft/deberta-base', output_hidden_states=True)
        output_channel = 16  # number of kernels
        num_classes = 2  # number of targets to predict
        dropout = 0.2  # dropout value
        embedding_dim = 768   # length of embedding dim

        ks = 3  # three conv nets here

        # input_channel = word embeddings at a value of 1; 3 for RGB images
        input_channel = 4  # for single embedding, input_channel = 1

        # [3, 4, 5] = window height
        # padding = padding to account for height of search window

        # 3 convolutional nets
        self.conv1 = nn.Conv2d(input_channel, output_channel, (3, embedding_dim), padding=(2, 0), groups=4)
        self.conv2 = nn.Conv2d(input_channel, output_channel, (4, embedding_dim), padding=(3, 0), groups=4)
        self.conv3 = nn.Conv2d(input_channel, output_channel, (5, embedding_dim), padding=(4, 0), groups=4)

        # apply dropout
        self.dropout = nn.Dropout(dropout)

        # fully connected layer for classification
        # 3x conv nets * output channel
        self.fc1 = nn.Linear(ks * output_channel, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, text_id, text_mask):
        # get the last 4 layers
        outputs= self.bert(text_id, attention_mask=text_mask)
        # all_layers  = [4, 16, 256, 768]
        hidden_layers = outputs[1]  # get hidden layers

        hidden_layers = torch.stack(hidden_layers, dim=1)
        x = hidden_layers[:, -4:] 
        # x = x.unsqueeze(1)
        # x = torch.mean(x, 0)
        # print(hidden_layers.size())
      
        torch.cuda.empty_cache()
        x = [F.relu(self.conv1(x)).squeeze(3), F.relu(self.conv2(x)).squeeze(3), F.relu(self.conv3(x)).squeeze(3)]
        # max-over-time pooling; # (batch, channel_output) * ks
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        # concat results; (batch, channel_output * ks)
        x = torch.cat(x, 1)
        # add dropout
        x = self.dropout(x)
        # generate logits (batch, target_size)
        logit = self.fc1(x)
        torch.cuda.empty_cache()
        return self.softmax(logit)

In [None]:
class roberta_Model_Arch(nn.Module):

    def __init__(self):
        super(roberta_Model_Arch, self).__init__()
        self.bert = AutoModel.from_pretrained('roberta-base', output_hidden_states=True)
        output_channel = 16  # number of kernels
        num_classes = 2  # number of targets to predict
        dropout = 0.2  # dropout value
        embedding_dim = 768   # length of embedding dim

        ks = 3  # three conv nets here

        # input_channel = word embeddings at a value of 1; 3 for RGB images
        input_channel = 4  # for single embedding, input_channel = 1

        # [3, 4, 5] = window height
        # padding = padding to account for height of search window

        # 3 convolutional nets
        self.conv1 = nn.Conv2d(input_channel, output_channel, (3, embedding_dim), padding=(2, 0), groups=4)
        self.conv2 = nn.Conv2d(input_channel, output_channel, (4, embedding_dim), padding=(3, 0), groups=4)
        self.conv3 = nn.Conv2d(input_channel, output_channel, (5, embedding_dim), padding=(4, 0), groups=4)

        # apply dropout
        self.dropout = nn.Dropout(dropout)

        # fully connected layer for classification
        # 3x conv nets * output channel
        self.fc1 = nn.Linear(ks * output_channel, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, text_id, text_mask):
        # get the last 4 layers
        outputs= self.bert(text_id, attention_mask=text_mask)
        # all_layers  = [4, 16, 256, 768]
        hidden_layers = outputs[2]  # get hidden layers

        hidden_layers = torch.stack(hidden_layers, dim=1)
        x = hidden_layers[:, -4:] 
        # x = x.unsqueeze(1)
        # x = torch.mean(x, 0)
        # print(hidden_layers.size())
      
        torch.cuda.empty_cache()
        x = [F.relu(self.conv1(x)).squeeze(3), F.relu(self.conv2(x)).squeeze(3), F.relu(self.conv3(x)).squeeze(3)]
        # max-over-time pooling; # (batch, channel_output) * ks
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        # concat results; (batch, channel_output * ks)
        x = torch.cat(x, 1)
        # add dropout
        x = self.dropout(x)
        # generate logits (batch, target_size)
        logit = self.fc1(x)
        torch.cuda.empty_cache()
        return self.softmax(logit)

In [None]:
class DatasetTest(Dataset):
    def __init__(self, df, tokenizer, max_length,column_):
        self.df = df
        self.max_len = max_length
        self.tokenizer = tokenizer
        self.text = df[column_].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(
                                text,
                                truncation=True,
                                add_special_tokens=True,
                                max_length=self.max_len,
                                padding='max_length'
                            )
       
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        
       
        
        
        return {
            'text_ids': torch.tensor(ids, dtype=torch.long),
            'text_mask': torch.tensor(mask, dtype=torch.long),
           
        }

In [None]:
@torch.no_grad()
def valid_fn(model, dataloader, device):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        ids = data['text_ids'].to(device, dtype = torch.long)
        mask = data['text_mask'].to(device, dtype = torch.long)
        
        outputs = model(ids, mask)
        # outputs = outputs.argmax(dim=1)
#         print(len(outputs))
#         print(len(np.max(outputs.cpu().detach().numpy(),axis=1)))
        PREDS.append(outputs.detach().cpu().numpy()) 
    
    PREDS = np.concatenate(PREDS)
    gc.collect()
    
    return PREDS

In [None]:
def inference(model_paths, dataloader, device,model):
    final_preds = []
    for i, path in enumerate(model_paths):
        model.to('cuda')
        model.load_state_dict(torch.load(path))
        
        print(f"Getting predictions for model {i+1}")
        preds = valid_fn(model, dataloader, device)
        final_preds.append(preds)
    
    final_preds = np.array(final_preds)
    final_preds = np.mean(final_preds, axis=0)
    final_preds= np.argmax(final_preds,axis=1)
    return final_preds

In [None]:
!pip install emoji

Collecting emoji
  Downloading emoji-1.6.3.tar.gz (174 kB)
[?25l[K     |█▉                              | 10 kB 23.6 MB/s eta 0:00:01[K     |███▊                            | 20 kB 29.1 MB/s eta 0:00:01[K     |█████▋                          | 30 kB 30.9 MB/s eta 0:00:01[K     |███████▌                        | 40 kB 34.8 MB/s eta 0:00:01[K     |█████████▍                      | 51 kB 28.4 MB/s eta 0:00:01[K     |███████████▎                    | 61 kB 30.5 MB/s eta 0:00:01[K     |█████████████▏                  | 71 kB 24.4 MB/s eta 0:00:01[K     |███████████████                 | 81 kB 25.2 MB/s eta 0:00:01[K     |█████████████████               | 92 kB 27.3 MB/s eta 0:00:01[K     |██████████████████▉             | 102 kB 27.7 MB/s eta 0:00:01[K     |████████████████████▊           | 112 kB 27.7 MB/s eta 0:00:01[K     |██████████████████████▋         | 122 kB 27.7 MB/s eta 0:00:01[K     |████████████████████████▌       | 133 kB 27.7 MB/s eta 0:00:01[K    

In [None]:
tokenizer_bert_tweet= AutoTokenizer.from_pretrained('vinai/bertweet-base')
tokenizer_bert_base_uncased= AutoTokenizer.from_pretrained('bert-base-uncased')
tokenizer_deberta= AutoTokenizer.from_pretrained('microsoft/deberta-base')
tokenizer_roberta= AutoTokenizer.from_pretrained('roberta-base')



Downloading:   0%|          | 0.00/558 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/824k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/474 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [None]:
df_test=pd.read_csv('/content/drive/MyDrive/ISarcasm/TestSet/task_A_En_test.csv')

In [None]:
valid_dataset = DatasetTest(df_test, tokenizer=tokenizer_bert_tweet, max_length=128,column_='text')
valid_loader = DataLoader(valid_dataset, batch_size=16, 
                              num_workers=2, shuffle=False, pin_memory=True)
MODEL_PATH_2=['/content/drive/MyDrive/ISarcasm/Models/bert_tweet_kim_cnn/Loss-Fold-0.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet_kim_cnn/Loss-Fold-1.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet_kim_cnn/Loss-Fold-2.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet_kim_cnn/Loss-Fold-3.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet_kim_cnn/Loss-Fold-4.bin']
preds_bert_tweet = inference(MODEL_PATH_2, valid_loader, 'cuda',Bert_tweet_Arch())

Downloading:   0%|          | 0.00/517M [00:00<?, ?B/s]

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 88/88 [00:29<00:00,  2.98it/s]


Getting predictions for model 2


100%|██████████| 88/88 [00:29<00:00,  2.96it/s]


Getting predictions for model 3


100%|██████████| 88/88 [00:30<00:00,  2.93it/s]


Getting predictions for model 4


100%|██████████| 88/88 [00:29<00:00,  2.99it/s]


Getting predictions for model 5


100%|██████████| 88/88 [00:29<00:00,  2.96it/s]


In [None]:
valid_dataset = DatasetTest(df_test, tokenizer=tokenizer_bert_tweet, max_length=128,column_='text')
valid_loader = DataLoader(valid_dataset, batch_size=16, 
                              num_workers=2, shuffle=False, pin_memory=True)
MODEL_PATH_2=['/content/drive/MyDrive/ISarcasm/Models/bert_tweet/Loss-Fold-0.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet/Loss-Fold-1.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet/Loss-Fold-2.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet/Loss-Fold-3.bin','/content/drive/MyDrive/ISarcasm/Models/bert_tweet/Loss-Fold-4.bin']
preds_bert_tweet_lstm = inference(MODEL_PATH_2, valid_loader, 'cuda',Bert_tweet_Model_Arch())

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 88/88 [00:27<00:00,  3.22it/s]


Getting predictions for model 2


100%|██████████| 88/88 [00:27<00:00,  3.23it/s]


Getting predictions for model 3


100%|██████████| 88/88 [00:27<00:00,  3.25it/s]


Getting predictions for model 4


100%|██████████| 88/88 [00:27<00:00,  3.23it/s]


Getting predictions for model 5


100%|██████████| 88/88 [00:27<00:00,  3.26it/s]


In [None]:
df_test_modified=pd.read_csv('/content/drive/MyDrive/ISarcasm/Test_dataset/test_task_1_En.csv')

In [None]:
df_test_modified.columns

Index(['text', 'label', 'POS_1', 'POS_2', 'POS_3', 'POS_4',
       'Positive Sentiment', 'Negative Sentiment', 'Neutral Sentiment',
       'sentiment', 'Blob Polarity', 'Blob Subjectivity',
       'positive Sentiment first half', 'negative Sentiment first half',
       'Neutral Sentiment first half', 'first half sentiment',
       'first half Blob Polarity', 'first half Blob Subjectivity',
       'positive Sentiment second half', 'negative Sentiment second half',
       'Neutral Sentiment second half', 'second half sentiment',
       'second half Blob Polarity', 'second half Blob Subjectivity',
       'Capitalization', 'Topic :', 'contain_emoji', 'count_special_chracter',
       'count_question_marks', 'subjectivity', 'count_verbs', 'count_nouns',
       'count_pronun', 'count_adjct', 'entities', 'count_profane',
       'capitalisation_rate', 'hashtag', 'hashtag_counts', 'length',
       'hashtag_word_ratio', '# punctuation', 'NE_score', 'demoji_text', 'url',
       'emoji', 'emoji_cou

In [None]:
class BertDataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        self.df = df
        self.max_len = max_length
        self.tokenizer = tokenizer
        self.text = df['text'].values
        self.fetures = df[['POS_1', 'POS_2', 'POS_3', 'POS_4',
       'Positive Sentiment', 'Negative Sentiment', 'sentiment',
       'Blob Polarity', 'Blob Subjectivity', 'positive Sentiment first half',
       'negative Sentiment first half', 'first half sentiment',
       'first half Blob Polarity', 'first half Blob Subjectivity',
       'positive Sentiment second half', 'negative Sentiment second half',
       'second half sentiment', 'second half Blob Polarity',
       'second half Blob Subjectivity', 'Capitalization', 'Topic :',
       'contain_emoji', 'count_special_chracter', 'count_question_marks',
       'subjectivity', 'count_verbs', 'count_nouns', 'count_pronun',
       'count_adjct', 'count_profane']].astype(float).values
       
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        # summary = self.summary[index]
        inputs_text = self.tokenizer.encode_plus(
                                text,
                                truncation=True,
                                add_special_tokens=True,
                                max_length=self.max_len,
                                padding='max_length'
                            )
        
                            
        
        text_ids = inputs_text['input_ids']
        text_mask = inputs_text['attention_mask']
        
        
       
        
        
        return {
            
            'text_ids': torch.tensor(text_ids, dtype=torch.long),
            'text_mask': torch.tensor(text_mask, dtype=torch.long),
            'features': torch.tensor(self.fetures[index], dtype=torch.float),
        }


In [None]:
@torch.no_grad()
def valid_fn_Bert(model, dataloader, device):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        ids = data['text_ids'].to(device, dtype = torch.long)
        mask = data['text_mask'].to(device, dtype = torch.long)
        fetures=data['features'].to(device, dtype = torch.float)
        
        outputs = model(ids, mask,fetures)
        # outputs = outputs.argmax(dim=1)
#         print(len(outputs))
#         print(len(np.max(outputs.cpu().detach().numpy(),axis=1)))
        PREDS.append(outputs.detach().cpu().numpy()) 
    
    PREDS = np.concatenate(PREDS)
    gc.collect()
    
    return PREDS

In [None]:
def inference_bert(model_paths, dataloader, device,model):
    final_preds = []
    for i, path in enumerate(model_paths):
        model.to('cuda')
        model.load_state_dict(torch.load(path))
        
        print(f"Getting predictions for model {i+1}")
        preds = valid_fn_Bert(model, dataloader, device)
        final_preds.append(preds)
    
    final_preds = np.array(final_preds)
    final_preds = np.mean(final_preds, axis=0)
    final_preds= np.argmax(final_preds,axis=1)
    return final_preds

In [None]:
df_test_modified

Unnamed: 0,text,label,POS_1,POS_2,POS_3,POS_4,Positive Sentiment,Negative Sentiment,Neutral Sentiment,sentiment,Blob Polarity,Blob Subjectivity,positive Sentiment first half,negative Sentiment first half,Neutral Sentiment first half,first half sentiment,first half Blob Polarity,first half Blob Subjectivity,positive Sentiment second half,negative Sentiment second half,Neutral Sentiment second half,second half sentiment,second half Blob Polarity,second half Blob Subjectivity,Capitalization,Topic :,contain_emoji,count_special_chracter,count_question_marks,subjectivity,count_verbs,count_nouns,count_pronun,count_adjct,entities,count_profane,capitalisation_rate,hashtag,hashtag_counts,length,hashtag_word_ratio,# punctuation,NE_score,demoji_text,url,emoji,emoji_counts,@_counts,@_counts_rates,irony_hashtag,punc_flooding,sarcasm_flag,hypocrisy_flag,seriously_flag,not_flag,sar_flag,haha_flag,ekphrasis
0,"Size on the the Toulouse team, That pack is mo...",0,8.0,3.0,4.0,2.0,0.239489,0.882986,0.861,-0.643497,0.800,0.400000,0.225000,0.100000,1.0,0.125000,0.0,0.0,0.525000,0.150000,1.0,0.375000,0.0,0.0,1,0.135726,False,0,0,0.750000,0.031250,0.015625,0.015625,0.007812,Toulouse Welsh,0,0.046875,[],0,25,0.0,5,0.080000,"Size on the the Toulouse team, That pack is mo...",0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1,Pinball!,0,1.0,0.0,0.0,0.0,0.000000,0.000000,1.000,0.000000,0.000,0.000000,0.000000,0.000000,1.0,0.000000,0.0,0.0,0.000000,0.000000,1.0,0.000000,0.0,0.0,0,0.000000,False,1,0,0.000000,0.000000,0.000000,0.000000,0.000000,,0,0.125000,[],0,1,0.0,1,1.000000,Pinball!,0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,So the Scottish Government want people to get ...,0,6.0,2.0,3.0,4.0,0.293818,1.001648,0.763,-0.707830,0.000,0.000000,0.450000,0.150000,1.0,0.300000,0.0,0.0,0.383333,0.266667,1.0,0.116667,0.0,0.0,0,0.167083,True,0,0,0.666667,0.018349,0.018349,0.000000,0.000000,Scottish,0,0.027523,[],0,19,0.0,1,0.052632,So the Scottish Government want people to get ...,0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
3,villainous pro tip : change the device name on...,0,7.0,2.0,1.0,0.0,0.045833,0.020833,0.725,0.025000,0.000,0.000000,0.191667,0.383333,1.0,-0.191667,0.0,0.0,0.383333,0.316667,1.0,0.066667,0.0,0.0,0,0.082880,True,0,0,0.000000,0.021739,0.010870,0.010870,0.032609,,0,0.000000,[],0,17,0.0,1,0.000000,villainous pro tip : change the device name on...,0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
4,I would date any of these men 🥺,0,4.0,0.0,0.0,0.0,0.031250,0.046875,1.000,-0.015625,0.000,0.000000,0.158333,0.166667,1.0,-0.008333,0.0,0.0,0.225000,0.050000,1.0,0.175000,0.0,0.0,0,0.251250,True,0,0,0.000000,0.064516,0.000000,0.032258,0.000000,,0,0.032258,[],0,8,0.0,0,0.000000,I would date any of these men pleading_face,0,{'🥺': 'pleading face'},1,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1395,I’ve just seen this and felt it deserved a Ret...,0,3.0,1.0,4.0,1.0,0.104167,0.000000,0.789,0.104167,0.000,0.000000,0.341667,0.383333,1.0,-0.041667,0.0,0.0,0.450000,0.050000,1.0,0.400000,0.0,0.0,0,0.143571,True,0,0,0.000000,0.056338,0.028169,0.028169,0.000000,xx,1,0.028169,[],0,15,0.0,0,0.000000,I’ve just seen this and felt it deserved a Ret...,0,{'😘': 'face blowing a kiss'},1,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1396,Omg how an earth is that a pen !!! 🤡,0,1.0,1.0,2.0,0.0,0.000000,0.089286,1.000,-0.089286,0.000,0.000000,0.075000,0.100000,1.0,-0.025000,0.0,0.0,0.075000,0.000000,1.0,0.075000,0.0,0.0,0,0.251250,True,3,0,0.000000,0.027778,0.000000,0.000000,0.000000,,1,0.027778,[],0,10,0.0,3,0.100000,Omg how an earth is that a pen !!! clown_face,0,{'🤡': 'clown face'},1,0,0.0,0.0,1,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1397,Bringing Kanye and drake to a tl near you,0,2.0,0.0,2.0,0.0,0.045455,0.068182,1.000,-0.022727,0.100,0.400000,0.158333,0.166667,1.0,-0.008333,0.0,0.0,0.075000,0.000000,1.0,0.075000,0.0,0.0,0,0.251250,False,0,0,0.400000,0.024390,0.024390,0.024390,0.000000,Kanye,0,0.048780,[],0,9,0.0,0,0.111111,Bringing Kanye and drake to a tl near you,0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1398,"I love it when women are referred to as ""girl ...",0,5.0,2.0,4.0,1.0,0.729167,0.031250,0.770,0.697917,0.625,0.600000,0.450000,0.050000,1.0,0.400000,0.0,0.0,0.300000,0.100000,1.0,0.200000,0.0,0.0,0,0.091364,True,1,0,0.600000,0.023810,0.000000,0.023810,0.000000,,0,0.011905,[],0,17,0.0,5,0.000000,"I love it when women are referred to as ""girl ...",0,{},0,0,0.0,0.0,0,0,0,0,0,0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [None]:
valid_dataset = BertDataset(df_test_modified, tokenizer=tokenizer_bert_base_uncased, max_length=128)
valid_loader = DataLoader(valid_dataset, batch_size=16, 
                              num_workers=2, shuffle=False, pin_memory=True)
MODEL_PATH_2=['/content/drive/MyDrive/ISarcasm/Models/bert_base_cased/Loss-Fold-0.bin','/content/drive/MyDrive/ISarcasm/Models/bert_base_cased/Loss-Fold-1.bin','/content/drive/MyDrive/ISarcasm/Models/bert_base_cased/Loss-Fold-2.bin','/content/drive/MyDrive/ISarcasm/Models/bert_base_cased/Loss-Fold-3.bin','/content/drive/MyDrive/ISarcasm/Models/bert_base_cased/Loss-Fold-4.bin']
preds_bert_based_uncased = inference_bert(MODEL_PATH_2, valid_loader, 'cuda',BertBaseUncased_Model_Arch())

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 88/88 [00:45<00:00,  1.93it/s]


Getting predictions for model 2


100%|██████████| 88/88 [00:45<00:00,  1.94it/s]


Getting predictions for model 3


100%|██████████| 88/88 [00:45<00:00,  1.95it/s]


Getting predictions for model 4


100%|██████████| 88/88 [00:45<00:00,  1.95it/s]


Getting predictions for model 5


100%|██████████| 88/88 [00:45<00:00,  1.95it/s]


In [None]:
valid_dataset = DatasetTest(df_test, tokenizer=tokenizer_deberta, max_length=128,column_='text')
valid_loader = DataLoader(valid_dataset, batch_size=16, 
                              num_workers=2, shuffle=False, pin_memory=True)
MODEL_PATH_2=['/content/drive/MyDrive/ISarcasm/Models/deberta_base/Loss-Fold-0.bin','/content/drive/MyDrive/ISarcasm/Models/deberta_base/Loss-Fold-1.bin','/content/drive/MyDrive/ISarcasm/Models/deberta_base/Loss-Fold-2.bin','/content/drive/MyDrive/ISarcasm/Models/deberta_base/Loss-Fold-3.bin','/content/drive/MyDrive/ISarcasm/Models/deberta_base/Loss-Fold-4.bin']
preds_deberta = inference(MODEL_PATH_2, valid_loader, 'cuda',Deberta_Model_Arch())

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 88/88 [00:36<00:00,  2.39it/s]


Getting predictions for model 2


100%|██████████| 88/88 [00:36<00:00,  2.41it/s]


Getting predictions for model 3


100%|██████████| 88/88 [00:36<00:00,  2.43it/s]


Getting predictions for model 4


100%|██████████| 88/88 [00:36<00:00,  2.42it/s]


Getting predictions for model 5


100%|██████████| 88/88 [00:36<00:00,  2.43it/s]


In [None]:
valid_dataset = DatasetTest(df_test, tokenizer=tokenizer_roberta, max_length=128,column_='text')
valid_loader = DataLoader(valid_dataset, batch_size=16, 
                              num_workers=2, shuffle=False, pin_memory=True)
MODEL_PATH_2=['/content/drive/MyDrive/ISarcasm/Models/roberta_base/Loss-Fold-0.bin','/content/drive/MyDrive/ISarcasm/Models/roberta_base/Loss-Fold-1.bin','/content/drive/MyDrive/ISarcasm/Models/roberta_base/Loss-Fold-2.bin','/content/drive/MyDrive/ISarcasm/Models/roberta_base/Loss-Fold-3.bin','/content/drive/MyDrive/ISarcasm/Models/roberta_base/Loss-Fold-4.bin']
preds_roberta = inference(MODEL_PATH_2, valid_loader, 'cuda',roberta_Model_Arch())

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model 1


100%|██████████| 88/88 [00:29<00:00,  2.94it/s]


Getting predictions for model 2


100%|██████████| 88/88 [00:29<00:00,  2.98it/s]


Getting predictions for model 3


100%|██████████| 88/88 [00:29<00:00,  2.98it/s]


Getting predictions for model 4


100%|██████████| 88/88 [00:29<00:00,  2.97it/s]


Getting predictions for model 5


100%|██████████| 88/88 [00:29<00:00,  2.98it/s]


In [None]:
# svm_model=joblib.load('/content/drive/MyDrive/ISarcasm/ML_task_A_models/svm_model_bin.sav')
# pred_svm=svm_model.predict()

In [None]:
df_bert_tweet = pd.DataFrame(preds_bert_tweet,columns =['Predictiion'])
df_bert_tweet_lstm = pd.DataFrame(preds_bert_tweet_lstm,columns =['Predictiion'])
df_bert_based_uncased = pd.DataFrame(preds_bert_based_uncased,columns =['Predictiion'])
df_deberta = pd.DataFrame(preds_deberta,columns =['Predictiion'])
df__roberta = pd.DataFrame(preds_roberta,columns =['Predictiion'])
df_svm=pd.read_csv('/content/drive/MyDrive/ISarcasm/ML_task_A_models/svm_model_prediction.csv')
df_svm_bagging=pd.read_csv('/content/drive/MyDrive/ISarcasm/ML_task_A_models/bagging_svm_model_prediction.csv')

In [None]:
all_data = [df_bert_tweet['Predictiion'] , 
             df_bert_tweet_lstm['Predictiion'],
            df_bert_based_uncased['Predictiion'], 
            df_deberta['Predictiion'], 
            df_svm['Predictiion'], 
            df__roberta['Predictiion'], 
            df_svm_bagging['Predictiion'], 
              ]




In [None]:
votes  = pd.concat(all_data, axis='columns')


In [None]:
votes

Unnamed: 0,Predictiion,Predictiion.1,Predictiion.2,Predictiion.3,Predictiion.4,Predictiion.5,Predictiion.6
0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,1
2,1,1,1,0,0,0,0
3,1,1,0,0,0,0,0
4,0,1,1,0,1,0,1
...,...,...,...,...,...,...,...
1395,0,0,0,0,0,0,0
1396,0,0,0,0,1,1,1
1397,0,0,0,0,1,0,0
1398,1,1,1,1,0,1,0


In [None]:
predictions = votes.mode(axis='columns').to_numpy()

In [None]:
predictions.flatten()

array([0, 0, 0, ..., 0, 1, 0])

In [None]:
len(predictions)

1400

In [None]:
with open('/content/drive/MyDrive/ISarcasm/Test_prediction/task_a_en.txt', 'w') as f:
    print('task_a_en', file=f)
    for pred in predictions:
      # print(pred)
      print(pred[0], file=f)

In [None]:
prediction_final

In [None]:
prediction_list