In [None]:
#!pip3 install datasets transformers

In [30]:
from datasets import load_dataset
from datasets import concatenate_datasets
langs=["cs","de","en","it","nl","sk"]
data_cs= load_dataset("multi_eurlex", language="cs",split='train[:200]')
data_de= load_dataset("multi_eurlex", language="de",split='train[:1000]')
data_en= load_dataset("multi_eurlex", language="en",split='train[:1000]')
data_it= load_dataset("multi_eurlex", language="it",split='train[:1000]')
data_nl= load_dataset("multi_eurlex", language="nl",split='train[:1000]')
data_sk= load_dataset("multi_eurlex", language="sk",split='train[:200]')

#data_train=concatenate_datasets([data_cs['train'],data_de["train"],data_en["train"],data_it["train"],data_nl["train"],data_sk["train"]])
#data_test=concatenate_datasets([data_cs['test'],data_de["test"],data_en["test"],data_it["test"],data_nl["test"],data_sk["test"]])

data_train = concatenate_datasets([data_de, data_en, data_it, data_nl])
data_test = concatenate_datasets([data_cs, data_sk])

Using custom data configuration default-language=cs
Reusing dataset multi_eurlex (/home/ubuntu/.cache/huggingface/datasets/multi_eurlex/default-language=cs/1.0.0/8ec8b79877a517369a143ead6679d1788d13e51cf641ed29772f4449e8364fb6)
Using custom data configuration default-language=de
Reusing dataset multi_eurlex (/home/ubuntu/.cache/huggingface/datasets/multi_eurlex/default-language=de/1.0.0/8ec8b79877a517369a143ead6679d1788d13e51cf641ed29772f4449e8364fb6)
Using custom data configuration default-language=en
Reusing dataset multi_eurlex (/home/ubuntu/.cache/huggingface/datasets/multi_eurlex/default-language=en/1.0.0/8ec8b79877a517369a143ead6679d1788d13e51cf641ed29772f4449e8364fb6)
Using custom data configuration default-language=it
Reusing dataset multi_eurlex (/home/ubuntu/.cache/huggingface/datasets/multi_eurlex/default-language=it/1.0.0/8ec8b79877a517369a143ead6679d1788d13e51cf641ed29772f4449e8364fb6)
Using custom data configuration default-language=nl
Reusing dataset multi_eurlex (/home/

In [31]:
import json
from random import shuffle
LABEL_MAP=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]  ##level 1 labels

In [12]:
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from transformers import BertForSequenceClassification
from copy import deepcopy
import gc
import torch
from sklearn.metrics import accuracy_score
import numpy as np

class Learner(nn.Module):
    """
    Meta Learner
    """
    def __init__(self, args):
        """
        :param args:
        """
        super(Learner, self).__init__()
        
        self.num_labels = args.num_labels
        self.outer_batch_size = args.outer_batch_size
        self.inner_batch_size = args.inner_batch_size
        self.outer_update_lr  = args.outer_update_lr
        self.inner_update_lr  = args.inner_update_lr
        self.inner_update_step = args.inner_update_step
        self.inner_update_step_eval = args.inner_update_step_eval
        self.bert_model = args.bert_model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        self.model = BertForSequenceClassification.from_pretrained(self.bert_model, num_labels = self.num_labels)
        self.outer_optimizer = Adam(self.model.parameters(), lr=self.outer_update_lr)
        self.model.train()

    def forward(self, batch_tasks, training = True):
        """
        batch = [(support TensorDataset, query TensorDataset),
                 (support TensorDataset, query TensorDataset),
                 (support TensorDataset, query TensorDataset),
                 (support TensorDataset, query TensorDataset)]
        
        # support = TensorDataset(all_input_ids, all_attention_mask, all_segment_ids, all_label_ids)
        """
        task_accs = []
        sum_gradients = []
        num_task = len(batch_tasks)
        num_inner_update_step = self.inner_update_step if training else self.inner_update_step_eval

        for task_id, task in enumerate(batch_tasks):
            support = task[0]
            query   = task[1]
            
            fast_model = deepcopy(self.model)
            fast_model.to(self.device)
            support_dataloader = DataLoader(support, sampler=RandomSampler(support),
                                            batch_size=self.inner_batch_size)
            
            inner_optimizer = Adam(fast_model.parameters(), lr=self.inner_update_lr)
            fast_model.train()
            
            print('----Task',task_id, '----')
            for i in range(0,num_inner_update_step):
                all_loss = []
                for inner_step, batch in enumerate(support_dataloader):
                    
                    batch = tuple(t.to(self.device) for t in batch)
                    input_ids, attention_mask, segment_ids, label_id = batch
                    outputs = fast_model(input_ids, attention_mask, segment_ids, labels = label_id)
                    
                    loss = outputs[0]              
                    loss.backward()
                    inner_optimizer.step()
                    inner_optimizer.zero_grad()
                    
                    all_loss.append(loss.item())
                
                if i % 4 == 0:
                    print("Inner Loss: ", np.mean(all_loss))

            query_dataloader = DataLoader(query, sampler=None, batch_size=len(query))
            query_batch = iter(query_dataloader).next()
            query_batch = tuple(t.to(self.device) for t in query_batch)
            q_input_ids, q_attention_mask, q_segment_ids, q_label_id = query_batch
            q_outputs = fast_model(q_input_ids, q_attention_mask, q_segment_ids, labels = q_label_id)
            
            if training:
                q_loss = q_outputs[0]
                q_loss.backward()
                fast_model.to(torch.device('cpu'))
                for i, params in enumerate(fast_model.parameters()):
                    if task_id == 0:
                        sum_gradients.append(deepcopy(params.grad))
                    else:
                        sum_gradients[i] += deepcopy(params.grad)

            q_logits = F.softmax(q_outputs[1],dim=1)
            pre_label_id = torch.argmax(q_logits,dim=1)
            pre_label_id = pre_label_id.detach().cpu().numpy().tolist()
            q_label_id = q_label_id.detach().cpu().numpy().tolist()
            
            acc = accuracy_score(pre_label_id,q_label_id)
            task_accs.append(acc)
            
            del fast_model, inner_optimizer
            torch.cuda.empty_cache()
        
        if training:
            # Average gradient across tasks
            for i in range(0,len(sum_gradients)):
                sum_gradients[i] = sum_gradients[i] / float(num_task)

            #Assign gradient for original model, then using optimizer to update its weights
            for i, params in enumerate(self.model.parameters()):
                params.grad = sum_gradients[i]

            self.outer_optimizer.step()
            self.outer_optimizer.zero_grad()
            
            del sum_gradients
            gc.collect()
        
        return np.mean(task_accs)

In [13]:
import time, os, random
def random_seed(value):
    torch.backends.cudnn.deterministic=True
    torch.manual_seed(value)
    torch.cuda.manual_seed(value)
    np.random.seed(value)
    random.seed(value)

In [14]:
def create_batch_of_tasks(taskset, is_shuffle = True, batch_size = 4):
    idxs = list(range(0,len(taskset)))
    #idxs = list(range(0,512))
    if is_shuffle:
        random.shuffle(idxs)
    for i in range(0,len(idxs), batch_size):
        yield [taskset[idxs[i]] for i in range(i, min(i + batch_size,len(taskset)))]

In [15]:
class TrainingArgs:
    def __init__(self):
        self.num_labels = 21
        self.meta_epoch=10
        self.k_spt=80
        self.k_qry=20
        self.outer_batch_size = 2    #change
        self.inner_batch_size = 12
        self.outer_update_lr = 0.001  #Change
        self.inner_update_lr = 0.001
        self.inner_update_step = 10
        self.inner_update_step_eval = 40
        self.bert_model = 'xlm-roberta-base'
        self.num_task_train = 500
        self.num_task_test = 5

args = TrainingArgs()

In [32]:
# low_resource=['cs','sk']
train_examples=[r for r in data_train]
test_examples=[r for r in data_test]
#print(type(train_examples))
#print(len(train_examples))
#print(train_examples[0])

<class 'list'>
4000
{'celex_id': '32006D0213', 'text': 'ENTSCHEIDUNG DER KOMMISSION\nvom 6. März 2006\nzur Festlegung der Brandverhaltensklassen für bestimmte Bauprodukte (Holzfußböden sowie Wand- und Deckenbekleidungen aus Massivholz)\n(Bekannt gegeben unter Aktenzeichen K(2006) 655)\n(Text von Bedeutung für den EWR)\n(2006/213/EG)\nDIE KOMMISSION DER EUROPÄISCHEN GEMEINSCHAFTEN -\ngestützt auf den Vertrag zur Gründung der Europäischen Gemeinschaft,\ngestützt auf die Richtlinie 89/106/EWG des Rates vom 21. Dezember 1988 zur Angleichung der Rechts- und Verwaltungsvorschriften der Mitgliedstaaten über Bauprodukte (1), insbesondere auf Artikel 20 Absatz 2,\nin Erwägung nachstehender Gründe:\n(1)\nNach der Richtlinie 89/106/EWG kann es zur Berücksichtigung der auf einzelstaatlicher, regionaler oder lokaler Ebene bestehenden unterschiedlichen Schutzniveaus für Bauwerke erforderlich sein, dass in den Grundlagendokumenten Klassen entsprechend der Leistung des jeweiligen Produkts im Hinblick 

In [33]:
from torch.utils.data import Dataset, TensorDataset
class MetaTask(Dataset):
    
    def __init__(self, examples, num_task, k_support, k_query, tokenizer):
        """
        :param samples: list of samples
        :param num_task: number of training tasks.
        :param k_support: number of support sample per task
        :param k_query: number of query sample per task
        """
        self.examples = examples
        random.shuffle(self.examples)
        
        self.num_task = num_task
        self.k_support = k_support
        self.k_query = k_query
        self.tokenizer = tokenizer
        self.max_seq_length = 200       #
        self.create_batch(self.num_task)
    
    def create_batch(self, num_task):
        self.supports = []  # support set
        self.queries = []  # query set
        
        for b in range(num_task):  # for each task
            # 1.select domain randomly
            #domain = random.choice(self.examples)['domain']   #domain corresponds to low resource language
            #domainExamples = [e for e in self.examples if e['domain'] == domain] 
            #domainExamples=random.choice(self.examples)
            domainExamples=random.sample(self.examples, 200)
            # 1.select k_support + k_query examples from domain randomly
            selected_examples = random.sample(list(domainExamples),self.k_support + self.k_query)
            random.shuffle(selected_examples)
            exam_train = selected_examples[:self.k_support]
            exam_test  = selected_examples[self.k_support:]
            
            self.supports.append(exam_train)
            self.queries.append(exam_test)

    def create_feature_set(self,examples):
        all_input_ids      = torch.empty((len(examples), self.max_seq_length), dtype = torch.long)
        all_attention_mask = torch.empty((len(examples), self.max_seq_length), dtype = torch.long)
        all_segment_ids    = torch.empty((len(examples), self.max_seq_length), dtype = torch.long)
        all_label_ids      = torch.empty(len(examples), dtype = torch.long)

        for id_,example in enumerate(examples):
            input_ids = tokenizer.encode(example['text'][:self.max_seq_length])
            attention_mask = [1] * len(input_ids)
            segment_ids    = [0] * len(input_ids)

            while len(input_ids) < self.max_seq_length:
            #while len(input_ids) < len(example['text']):
                input_ids.append(0)
                attention_mask.append(0)
                segment_ids.append(0)

            #label_id = LABEL_MAP[example['label']].  ##check labels
            label_id = LABEL_MAP[example['labels'][0]]
            all_input_ids[id_] = torch.Tensor(input_ids).to(torch.long)
            all_attention_mask[id_] = torch.Tensor(attention_mask).to(torch.long)
            all_segment_ids[id_] = torch.Tensor(segment_ids).to(torch.long)
            all_label_ids[id_] = torch.Tensor([label_id]).to(torch.long)

        tensor_set = TensorDataset(all_input_ids, all_attention_mask, all_segment_ids, all_label_ids)  
        return tensor_set
    
    def __getitem__(self, index):
        support_set = self.create_feature_set(self.supports[index])
        query_set   = self.create_feature_set(self.queries[index])
        return support_set, query_set

    def __len__(self):
        # as we have built up to batchsz of sets, you can sample some small batch size of sets.
        return self.num_task

In [34]:
from transformers import BertModel, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base', do_lower_case = True)
train = MetaTask(train_examples, num_task = 100, k_support=100, k_query=30, tokenizer = tokenizer)
train[0]

(<torch.utils.data.dataset.TensorDataset at 0x7fc26a811c10>,
 <torch.utils.data.dataset.TensorDataset at 0x7fc272363590>)

In [35]:
learner = Learner(args)
    
test = MetaTask(test_examples, num_task = args.num_task_test, k_support=args.k_spt,k_query=args.k_qry, tokenizer = tokenizer)

global_step = 0
#for epoch in range(args.epoch):
for epoch in range(args.meta_epoch):
        train = MetaTask(train_examples, num_task = args.num_task_train, k_support=args.k_spt, 
                         k_query=args.k_qry, tokenizer = tokenizer)

        db = create_batch_of_tasks(train, is_shuffle = True, batch_size = args.outer_batch_size)

        for step, task_batch in enumerate(db):

            acc = learner(task_batch)

            print('Step:', step, '\ttraining Acc:', acc)

            if global_step % 10 == 0:
                random_seed(123)
                print("\n-----------------Testing Mode-----------------\n")
                db_test = create_batch_of_tasks(test, is_shuffle = False, batch_size = 1)
                acc_all_test = []

                for test_batch in db_test:
                    acc = learner(test_batch, training = False)
                    acc_all_test.append(acc)

                print('Step:', step, 'Test F1:', np.mean(acc_all_test))

                random_seed(int(time.time() % 10))

            global_step += 1

You are using a model of type xlm-roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing BertForSequenceClassification: ['roberta.encoder.layer.5.attention.self.query.bias', 'roberta.encoder.layer.6.intermediate.dense.bias', 'roberta.embeddings.token_type_embeddings.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'lm_head.bias', 'roberta.encoder.layer.9.attention.output.LayerNorm.weight', 'roberta.encoder.layer.11.attention.self.value.weight', 'roberta.encoder.layer.8.attention.self.key.weight', 'roberta.encoder.layer.8.output.LayerNorm.bias', 'roberta.embeddings.LayerNorm.bias', 'lm_head.dense.weight', 'roberta.encoder.layer.5.attention.self.key.weight', 'roberta.encoder.layer.6.attention.self.query.weight', 'roberta.encoder.layer.7.output.dense.bias', 'roberta.encoder.layer.6.attention.output.dense.weight', '

----Task 0 ----
Inner Loss:  4.237973724092756
Inner Loss:  2.3726201908929005
Inner Loss:  2.3076487268720354
----Task 1 ----
Inner Loss:  3.7493675776890347
Inner Loss:  2.0911532981055125
Inner Loss:  2.053628887448992
Step: 0 	training Acc: 0.45

-----------------Testing Mode-----------------

----Task 0 ----
Inner Loss:  3.3860280173165456
Inner Loss:  2.349792957305908
Inner Loss:  2.2937654427119663
Inner Loss:  2.3204820496695384
Inner Loss:  2.3693802867616927
Inner Loss:  2.3483078479766846
Inner Loss:  2.2813731942858015
Inner Loss:  2.3149803706577847
Inner Loss:  2.337707689830235
Inner Loss:  2.260998708861215
----Task 0 ----
Inner Loss:  3.204136848449707
Inner Loss:  2.0967569521495273


KeyboardInterrupt: 