<a href="https://colab.research.google.com/github/musitafa0032/Multilingual-Sarcasm-detection-via-meta-learning/blob/main/Multilingual_Sarcasm_detection_Meta_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import pandas as pd
from torch.utils.data import Dataset,TensorDataset,RandomSampler,DataLoader
import numpy as np
#import collections
import random
from torch import nn
from torch.nn import functional as F
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from transformers import BertTokenizer,BertForSequenceClassification
from copy import deepcopy
import gc
from sklearn.metrics import accuracy_score

# New Section

In [None]:
df_ar=pd.read_csv("/train_data/train.Ar.csv")
df_ar.dropna(axis=0,how="any")
df_en=pd.read_csv("/train_data/train.En.csv")
df_en.dropna(axis=0,how="any")
#shuffle df dataset
df_ar=df_ar.sample(frac=1,ignore_index=True)
tweets_ar=df_ar.tweet.to_list()
labels_ar=df_ar.sarcastic.to_list()
df_en=df_en.sample(frac=1,ignore_index=True)
tweets_en=df_en.tweet.to_list()
labels_en=df_en.sarcastic.to_list()

In [None]:
from transformers import BertTokenizer
tokenizer=BertTokenizer.from_pretrained("bert-base-multilingual-cased",do_lower_case=True)

data_supports=[]
for item in [zip(tweets_ar,labels_ar),zip(tweets_en,labels_en)]:
    data_list=[]
    for tweet,label in item:
        d=dict()
        d["text"]=tweet
        d["label"]=label
        data_list.append(d)
    data_supports.append(data_list)
    del data_list

# New Section

In [None]:
class MetaTask(Dataset):
    def __init__(self,data_lists_support,data_lists_query,tokenizer):
        self.data_lists_support=data_lists_support
        self.data_lists_query=data_lists_query
        self.max_seq_length=256
        self.tokenizer=tokenizer
    
    def create_feature_set(self,examples):
        all_input_ids=torch.empty(len(examples),self.max_seq_length,dtype=torch.long)
        all_attention_mask=torch.empty(len(examples),self.max_seq_length,dtype=torch.long)
        all_segment_ids=torch.empty(len(examples),self.max_seq_length,dtype=torch.long)
        all_label_ids=torch.empty(len(examples),dtype=torch.long)
        
        for id_,example in enumerate(examples):
            input_ids=self.tokenizer.encode(str(example["text"]))
            attention_mask = [1]*len(input_ids)
            segment_ids=[0]*len(input_ids)
            
            while len(input_ids)<self.max_seq_length:
                input_ids.append(0)
                attention_mask.append(0)
                segment_ids.append(0)
            
            label_id=example["label"]
            all_input_ids[id_]=torch.Tensor(input_ids).to(torch.long)
            all_attention_mask[id_]=torch.Tensor(attention_mask).to(torch.long)
            all_segment_ids[id_]=torch.Tensor(segment_ids).to(torch.long)
            all_label_ids[id_]=torch.Tensor([label_id]).to(torch.long)
        tensor_set=TensorDataset(all_input_ids,all_attention_mask,all_segment_ids,all_label_ids)
        return tensor_set
    
    def __getitem__(self,index):
        support_set=self.create_feature_set(self.data_lists_support[index])
        query_set=self.create_feature_set(self.data_lists_query[index])
        return support_set,query_set
    
    def __len__(self):
        return len(self.data_lists_support)

In [None]:
class Learner(nn.Module):
    def __init__(self,num_labels,outer_batch_size,inner_batch_size,outer_update_lr,inner_update_lr,inner_update_step,inner_update_step_eval,bert_model):
        super(Learner,self).__init__()
        
        self.num_labels=num_labels
        self.outer_batch_size = outer_batch_size
        self.inner_batch_size = inner_batch_size
        self.outer_update_lr  = outer_update_lr
        self.inner_update_lr  = inner_update_lr
        self.inner_update_step = inner_update_step
        self.inner_update_step_eval = inner_update_step_eval
        self.bert_model = bert_model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        self.model = BertForSequenceClassification.from_pretrained(self.bert_model, num_labels = self.num_labels)
        self.outer_optimizer = AdamW(self.model.parameters(), lr=self.outer_update_lr)
        self.model.train()
    def forward(self,batch_tasks,training =True):
        task_accs=[]
        sum_gradients=[]
        num_task=len(batch_tasks)
        num_inner_update_step=self.inner_update_step if training else self.inner_update_step_eval
        
        for task_id,task in enumerate(batch_tasks):
            support=task[0]
            query=task[1]
            
            fast_model=deepcopy(self.model)
            fast_model.to(self.device)
            support_dataloader=DataLoader(support,sampler=RandomSampler(support),
                                          batch_size=self.inner_batch_size)
            inner_optimizer=AdamW(fast_model.parameters(),lr=self.inner_update_lr)
            fast_model.train()
            
            print('----Task',task_id,'----')
            for i in range(0,num_inner_update_step):
                all_loss=[]
                for inner_step,batch in enumerate(support_dataloader):
                    
                    batch=tuple(t.to(self.device) for t in batch)
                    input_ids,attention_mask,segment_ids,label_id=batch
                    outputs=fast_model(input_ids,attention_mask,segment_ids,labels=label_id)
                    
                    loss=outputs[0]
                    loss.backward()
                    inner_optimizer.step()
                    inner_optimizer.zero_grad()
                    
                    all_loss.append(loss.item())
                    
                if i%4 == 0:
                    print("Inner Loss:",np.mean(all_loss))
                    
            fast_model.to(torch.device("cpu"))
            
            if training:
                meta_weights=list(self.model.parameters())
                fast_weights=list(fast_model.parameters())
                
                gradients=[]
                for i,(meta_params,fast_params) in enumerate(zip(meta_weights,fast_weights)):
                    gradient=meta_params-fast_params
                    if task_id==0:
                        sum_gradients.append(gradient)
                    else:
                        sum_gradients[i]+=gradient
            
            fast_model.to(self.device)
            fast_model.eval()
            with torch.no_grad():
                query_dataloader = DataLoader(query, sampler=None, batch_size=len(query))
                query_batch = iter(query_dataloader).next()
                query_batch = tuple(t.to(self.device) for t in query_batch)
                q_input_ids, q_attention_mask, q_segment_ids, q_label_id = query_batch
                q_outputs = fast_model(q_input_ids, q_attention_mask, q_segment_ids, labels = q_label_id)

                q_logits = F.softmax(q_outputs[1],dim=1)
                pre_label_id = torch.argmax(q_logits,dim=1)
                pre_label_id = pre_label_id.detach().cpu().numpy().tolist()
                q_label_id = q_label_id.detach().cpu().numpy().tolist()

                acc = accuracy_score(pre_label_id,q_label_id)
                task_accs.append(acc)
            
            fast_model.to(torch.device('cpu'))
            del fast_model, inner_optimizer
            torch.cuda.empty_cache()
        
        if training:
            # Average gradient across tasks
            for i in range(0,len(sum_gradients)):
                sum_gradients[i] = sum_gradients[i] / float(num_task)

            #Assign gradient for original model, then using optimizer to update its weights
            for i, params in enumerate(self.model.parameters()):
                params.grad = sum_gradients[i]

            self.outer_optimizer.step()
            self.outer_optimizer.zero_grad()
            
            del sum_gradients
            gc.collect()
            
        return np.mean(task_accs)

    


In [None]:
from random import shuffle
#from collections import Counter
import torch
#from transformers import BertModel, BertTokenizer
import time
import logging
#import argparse
import os
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'#locate which place to debug when using gpu
#from reptile import Learner
#from task import MetaTask
import random
#import numpy as np

In [None]:
def random_seed(value):
    torch.backends.cudnn.deterministic=True
    torch.manual_seed(value)
    torch.cuda.manual_seed(value)
    np.random.seed(value)
    random.seed(value)

def create_batch_of_tasks(taskset, is_shuffle = True, batch_size = 4):
    idxs = list(range(0,len(taskset)))
    if is_shuffle:
        random.shuffle(idxs)
    for i in range(0,len(idxs), batch_size):
        yield [taskset[idxs[i]] for i in range(i, min(i + batch_size,len(taskset)))]



In [None]:
learner=Learner(num_labels=2,outer_batch_size = 2,inner_batch_size = 12,outer_update_lr = 5e-5,inner_update_lr = 5e-5,inner_update_step = 10,inner_update_step_eval = 40,bert_model = 'bert-base-multilingual-cased')
test_task=MetaTask(data_supports, data_supports,tokenizer)
global_step=0
for epoch in range(10):
    train_task=MetaTask(data_supports, data_supports,tokenizer)
    db = create_batch_of_tasks(train_task, is_shuffle = True, batch_size = 2)
    for step, task_batch in enumerate(db):
        f = open('log.txt', 'a')
        acc = learner(task_batch)
        print('Step:', step, '\ttraining Acc:', acc)
        if global_step % 20 == 0:
            random_seed(123)
            print("\n-----------------Testing Mode-----------------\n")
            db_test = create_batch_of_tasks(test_task, is_shuffle = False, batch_size = 1)
            acc_all_test = []
            
            for test_batch in db_test:
                acc = learner(test_batch, training = False)
                acc_all_test.append(acc)
            
            print('Step:', step, 'Test F1:', np.mean(acc_all_test))
            random_seed(int(time.time() % 10))
            
        global_step += 1
        f.close()
                

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

----Task 0 ----
Inner Loss: 0.3809157653801864


In [None]:
pip install transformers

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 18.5 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 38.9 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 6.3 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 68.8 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 51.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers


In [None]:
pip install pandas --upgrade

Collecting pandas
  Downloading pandas-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 15.1 MB/s 
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.1.5
    Uninstalling pandas-1.1.5:
      Successfully uninstalled pandas-1.1.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas~=1.1.0; python_version >= "3.0", but you have pandas 1.3.4 which is incompatible.[0m
Successfully installed pandas-1.3.4
