## Submission 
- K-Fold with Random Masking (acc: 80.4%)
- K-Fold with Truncation (acc: 80.3%)
- Final Hard Voting (acc: 81.1%)

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import pandas as pd
import numpy as np
import re
import tarfile
import pickle as pickle
from tqdm import tqdm
import random
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import warnings

# Ignore Warnings
warnings.filterwarnings(action='ignore')

# Using KoELECTRA Model
from transformers import ElectraModel, ElectraTokenizer, ElectraForSequenceClassification
from transformers import *

# Added by Me
import os
from tqdm import tqdm
from ohsuz.utils import *
from ohsuz.loss import *
from ohsuz.config import *
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

There are 1 GPU(s) available.
We will use the GPU: Tesla V100-PCIE-32GB
There are 1 GPU(s) available.
We will use the GPU: Tesla V100-PCIE-32GB


In [2]:
batch_size = 16
epochs = 10
log_interval = 50
lr = 1e-5

In [3]:
seed_everything(2021)

이 실험은 seed 2021로 고정되었습니다.


In [4]:
error_label_0 = ['wikitree-12599-4-108-111-4-7',
                 'wikipedia-25967-115-24-26-35-37',
                 'wikipedia-16427-6-14-17-20-22',
                 'wikipedia-16427-8-0-3-26-28',
                 'wikitree-19765-5-30-33-6-8',
                 'wikitree-58702-0-18-20-22-24',
                 'wikitree-71638-8-21-23-15-17',
                 'wikipedia-257-0-0-1-53-57',
                 'wikipedia-13649-28-66-70-14-24',
                 'wikipedia-6017-8-20-26-4-7']
error_label_1 = ['wikitree-55837-4-0-2-10-11']
error_label_2 = ['wikitree-62775-3-3-7-0-2']
error_label_3 = ['wikipedia-23188-0-74-86-41-42']

### 1. Dataset & DataLoader 준비

In [5]:
def data_truncation(data):
    padding_length = 50

    entity_min_index = min(data[3], data[6])
    entity_max_index = max(data[4], data[7])

    min_entity_start, min_entity_end = entity_min_index-padding_length, entity_min_index+padding_length
    max_entity_start, max_entity_end = entity_max_index-padding_length, entity_max_index+padding_length

    if min_entity_end < max_entity_start:
        min_entity_start = max(min_entity_start, 0)
        max_entity_end = min(max_entity_end, len(data[1]))
        return data[1][min_entity_start:min_entity_end] + data[1][max_entity_start:max_entity_end] # </s> 제거
    else:
        min_entity_start = max(min_entity_start, 0)
        max_entity_end = min(max_entity_end, len(data[1]))
        return data[1][min_entity_start:max_entity_end]

In [6]:
def add_entity_tokens(sentence, a1, a2, b1, b2):
    new_sentence = None
    special_tokens = special_tokens_dict['additional_special_tokens']
    
    if a1 > b1: # b1 먼저
        new_sentence = sentence[:b1] + special_tokens[2] + sentence[b1:b2+1] + special_tokens[3] + sentence[b2+1:a1] + special_tokens[0] + sentence[a1:a2+1] + special_tokens[1] + sentence[a2+1:]
    else: # a1 먼저
        new_sentence = sentence[:a1] + special_tokens[0] + sentence[a1:a2+1] + special_tokens[1] + sentence[a2+1:b1] + special_tokens[2] + sentence[b1:b2+1] + special_tokens[3] + sentence[b2+1:]
    return new_sentence

In [7]:
def load_data(dataset_dir, add_entity=True, truncation=False):
    with open('/opt/ml/input/data/label_type.pkl', 'rb') as f:
        label_type = pickle.load(f)
    dataset = pd.read_csv(dataset_dir, delimiter='\t', header=None)
    if truncation:
        dataset[1] = dataset.apply(data_truncation, axis=1)
    dataset = preprocessing_dataset(dataset, label_type, add_entity)
    return dataset


def preprocessing_dataset(dataset, label_type, add_entity):
    label = []
    
    for ID, i in zip(dataset[0], dataset[8]):
        if i == 'blind':
            label.append(100)
        elif ID in error_label_0:
            label.append(label_type['관계_없음'])
        elif ID in error_label_1:
            label.append(label_type['단체:구성원'])
        elif ID in error_label_2:
            label.append(label_type['단체:본사_도시'])
        elif ID in error_label_3:
            label.append(label_type['단체:하위_단체'])
        else:
            label.append(label_type[i])
    
    if add_entity:
        sentences = [add_entity_tokens(dataset[1][i], dataset[3][i], dataset[4][i], dataset[6][i], dataset[7][i]) for i in tqdm(range(len(dataset)))]
    else:
        sentences = dataset[1]
    
    out_dataset = pd.DataFrame({'sentence':sentences,'entity_01':dataset[2],'entity_02':dataset[5],'label':label})

    return out_dataset

In [8]:
class KlueDataset(Dataset):
    def __init__(self, tsv_file, add_entity=True, threshold=0.1):
        self.dataset = load_data(tsv_file, add_entity)
        self.dataset['sentence'] = self.dataset['entity_01'] + ' RELATION ' + self.dataset['entity_02'] + ' </s></s> ' + self.dataset['sentence']
        self.sentences = list(self.dataset['sentence'])
        self.labels = list(self.dataset['label'])
        self.tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-large')
        self.threshold = threshold
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        sentence, label = self.sentences[idx], self.labels[idx]
        inputs = self.tokenizer(
            sentence,
            return_tensors='pt',
            truncation=True,
            max_length=200,
            pad_to_max_length=True,
            add_special_tokens=True
        )
            
        input_ids = inputs['input_ids'][0]
        attention_mask = inputs['attention_mask'][0]
        
        return self._random_enk(input_ids), attention_mask, label
    
    def _random_enk(self, sent):
        mask_id = self.tokenizer.encode('<mask>')[1]

        decoded_ids = self.tokenizer.decode(sent, skip_special_tokens=False)
        ent = list(decoded_ids.split('</s>'))[0]

        encoded_ent_list = list(set(self.tokenizer.encode(ent)+[1]))
        ent_token_ids = []

        for i, token in enumerate(sent):
            if token in encoded_ent_list: continue
            elif self.threshold > random.random(): 
                sent[i] = mask_id
        return sent

In [9]:
dataset = KlueDataset(os.path.join(train_dir, 'train.tsv'))
test_dataset = KlueDataset(os.path.join(test_dir, 'test.tsv'))

100%|██████████| 9000/9000 [00:00<00:00, 27816.04it/s]
100%|██████████| 1000/1000 [00:00<00:00, 28365.77it/s]


In [10]:
kf = KFold(n_splits=10, shuffle=False)

In [11]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=5)

### 2. Train

In [13]:
ls_loss = LabelSmoothingLoss()
cels_loss = CELSLoss()
focal_loss = FocalLoss()

In [14]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [15]:
for fold, (train_ids, test_ids) in enumerate(kf.split(dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    val_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)

    train_loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, sampler=train_subsampler)
    val_loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, sampler=val_subsampler)

    model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-large', num_labels=42).to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

    best_acc = 0.0
    best_loss = 9999.0
   
    for epoch in tqdm(range(10)):
        train_acc = 0.0
        test_acc = 0.0
        loss_sum = 0.0

        model.train()

        for batch_id, (input_ids_batch, attention_masks_batch, y_batch) in enumerate(train_loader):
            optimizer.zero_grad()
            y_batch = y_batch.to(device)
            y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
            y_pred[:, 0] += 0.05
            loss = cels_loss(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            scheduler.step()
            loss_sum += float(loss.data.cpu().numpy())
            train_acc += calc_accuracy(y_pred, y_batch)
            if batch_id % log_interval == 0:
                print(f"epoch {epoch+1} batch id {batch_id+1} loss {loss.data.cpu().numpy()} train acc {train_acc / (batch_id+1)}")

        train_acc = train_acc / (batch_id+1)
        print(f"epoch {epoch+1} train acc {train_acc}")

        
        model.eval()
        for batch_id, (input_ids_batch, attention_masks_batch, y_batch) in enumerate(val_loader):
            y_batch = y_batch.to(device)
            y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
            test_acc += calc_accuracy(y_pred, y_batch)

        print(f"epoch {epoch+1} test acc {test_acc / (batch_id+1)}")

        if test_acc >= best_acc:
            best_acc = test_acc
            torch.save(model.state_dict(), f"/opt/ml/models/0421_mask_{fold}.pt")
            
            model.eval()

            preds = torch.tensor([])
            for input_ids_batch, attention_masks_batch, y_batch in tqdm(test_loader):
                y_batch = y_batch.to(device)
                y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu()
                preds = torch.cat([preds, y_pred], dim=0)
            np.save(f'/opt/ml/logits/mask_{fold}.npy', preds.numpy())

FOLD 0
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.646056175231934 train acc 0.0
epoch 1 batch id 51 loss 4.245569705963135 train acc 0.45098039215686275
epoch 1 batch id 101 loss 4.841658592224121 train acc 0.4709158415841584
epoch 1 batch id 151 loss 4.775950908660889 train acc 0.4776490066225166
epoch 1 batch id 201 loss 4.406172752380371 train acc 0.4835199004975124
epoch 1 batch id 251 loss 5.758153915405273 train acc 0.4838147410358566
epoch 1 batch id 301 loss 3.813262462615967 train acc 0.4850498338870432
epoch 1 batch id 351 loss 4.87194299697876 train acc 0.48522079772079774
epoch 1 batch id 401 loss 3.460883140563965 train acc 0.48659600997506236
epoch 1 batch id 451 loss 3.728487491607666 train acc 0.48683481152993346
epoch 1 batch id 501 loss 3.1237778663635254 train acc 0.4943862275449102
epoch 1 train acc 0.4942061143984221
epoch 1 test acc 0.46600877192982454



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.02it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.48it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.42it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.82it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.19it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.49it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.73it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 2 batch id 1 loss 5.438511848449707 train acc 0.375
epoch 2 batch id 51 loss 2.815375328063965 train acc 0.5649509803921569
epoch 2 batch id 101 loss 3.4937405586242676 train acc 0.5779702970297029
epoch 2 batch id 151 loss 1.840301752090454 train acc 0.5889900662251656
epoch 2 batch id 201 loss 2.641732692718506 train acc 0.5982587064676617
epoch 2 batch id 251 loss 3.1020052433013916 train acc 0.6018426294820717
epoch 2 batch id 301 loss 2.2219576835632324 train acc 0.6104651162790697
epoch 2 batch id 351 loss 1.7116611003875732 train acc 0.6194800569800569
epoch 2 batch id 401 loss 1.4486900568008423 train acc 0.6259351620947631
epoch 2 batch id 451 loss 2.151231288909912 train acc 0.6291574279379157
epoch 2 batch id 501 loss 1.2642884254455566 train acc 0.6346057884231537
epoch 2 train acc 0.6362179487179487
epoch 2 test acc 0.6765350877192983



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.71it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.14it/s][A
  5%|▍         | 3/63 [00:00<00:22,  2.62it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.09it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.54it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.93it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.27it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.54it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.76it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.93it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 3 batch id 1 loss 1.4059972763061523 train acc 0.875
epoch 3 batch id 51 loss 2.7298331260681152 train acc 0.7389705882352942
epoch 3 batch id 101 loss 1.9600365161895752 train acc 0.7233910891089109
epoch 3 batch id 151 loss 2.2639880180358887 train acc 0.7173013245033113
epoch 3 batch id 201 loss 2.9454345703125 train acc 0.712997512437811
epoch 3 batch id 251 loss 2.3157927989959717 train acc 0.7178784860557769
epoch 3 batch id 301 loss 2.7318062782287598 train acc 0.7196843853820598
epoch 3 batch id 351 loss 1.7665812969207764 train acc 0.7200854700854701
epoch 3 batch id 401 loss 2.0535569190979004 train acc 0.7231920199501247
epoch 3 batch id 451 loss 2.5557005405426025 train acc 0.7246396895787139
epoch 3 batch id 501 loss 2.140188694000244 train acc 0.7275449101796407
epoch 3 train acc 0.7289201183431953
epoch 3 test acc 0.6885964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.71it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.14it/s][A
  5%|▍         | 3/63 [00:00<00:22,  2.62it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.10it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.55it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.94it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.28it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.57it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.80it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.97it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.09it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.23it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.28it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 4 batch id 1 loss 1.5902433395385742 train acc 0.8125
epoch 4 batch id 51 loss 1.3844517469406128 train acc 0.7781862745098039
epoch 4 batch id 101 loss 2.700282573699951 train acc 0.7766089108910891
epoch 4 batch id 151 loss 1.0803413391113281 train acc 0.7777317880794702
epoch 4 batch id 201 loss 2.039592981338501 train acc 0.779228855721393
epoch 4 batch id 251 loss 1.019470453262329 train acc 0.7749003984063745
epoch 4 batch id 301 loss 2.3367319107055664 train acc 0.7734634551495017
epoch 4 batch id 351 loss 1.1345577239990234 train acc 0.7790242165242165
epoch 4 batch id 401 loss 1.989020586013794 train acc 0.7793017456359103
epoch 4 batch id 451 loss 0.9723448753356934 train acc 0.7778547671840355
epoch 4 batch id 501 loss 1.7495369911193848 train acc 0.7776946107784432
epoch 4 train acc 0.777120315581854
epoch 4 test acc 0.7116228070175439



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.64it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.07it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.53it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.99it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.44it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.50it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.73it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.24it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.32it/s]

epoch 5 batch id 1 loss 0.9118044376373291 train acc 0.875
epoch 5 batch id 51 loss 0.978644073009491 train acc 0.8174019607843137
epoch 5 batch id 101 loss 0.7921181321144104 train acc 0.8186881188118812
epoch 5 batch id 151 loss 1.5405211448669434 train acc 0.8191225165562914
epoch 5 batch id 201 loss 0.6421346664428711 train acc 0.8205845771144279
epoch 5 batch id 251 loss 0.6830796599388123 train acc 0.8229581673306773
epoch 5 batch id 301 loss 0.9704985618591309 train acc 0.8181063122923588
epoch 5 batch id 351 loss 2.0911431312561035 train acc 0.8148148148148148
epoch 5 batch id 401 loss 0.7922980785369873 train acc 0.8156172069825436
epoch 5 batch id 451 loss 0.6994339227676392 train acc 0.8138858093126385
epoch 5 batch id 501 loss 1.1135663986206055 train acc 0.8125
epoch 5 train acc 0.8125


 50%|█████     | 5/10 [28:01<27:43, 332.63s/it]

epoch 5 test acc 0.7094298245614035
epoch 6 batch id 1 loss 0.3444299101829529 train acc 0.9375
epoch 6 batch id 51 loss 0.9122189283370972 train acc 0.8357843137254902
epoch 6 batch id 101 loss 1.5724126100540161 train acc 0.8422029702970297
epoch 6 batch id 151 loss 0.3914346694946289 train acc 0.8468543046357616
epoch 6 batch id 201 loss 0.7541441321372986 train acc 0.8429726368159204
epoch 6 batch id 251 loss 1.307802438735962 train acc 0.8426294820717132
epoch 6 batch id 301 loss 0.8276716470718384 train acc 0.8446843853820598
epoch 6 batch id 351 loss 1.0742424726486206 train acc 0.8447293447293447
epoch 6 batch id 401 loss 2.141368865966797 train acc 0.8430486284289277
epoch 6 batch id 451 loss 0.7678279876708984 train acc 0.8435421286031042
epoch 6 batch id 501 loss 1.6255234479904175 train acc 0.846307385229541
epoch 6 train acc 0.8460305719921104
epoch 6 test acc 0.7192982456140351



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.03it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.49it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.83it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.18it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.47it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.17it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.32it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.34it/s]

epoch 7 batch id 1 loss 1.4046250581741333 train acc 0.8125
epoch 7 batch id 51 loss 0.8087397217750549 train acc 0.8627450980392157
epoch 7 batch id 101 loss 0.5130358934402466 train acc 0.8669554455445545
epoch 7 batch id 151 loss 0.8383698463439941 train acc 0.8712748344370861
epoch 7 batch id 201 loss 1.728076457977295 train acc 0.8756218905472637
epoch 7 batch id 251 loss 1.1591925621032715 train acc 0.8754980079681275
epoch 7 batch id 301 loss 0.5124013423919678 train acc 0.873546511627907
epoch 7 batch id 351 loss 0.8863003849983215 train acc 0.8714387464387464
epoch 7 batch id 401 loss 0.952018678188324 train acc 0.8721945137157108
epoch 7 batch id 451 loss 0.6612070202827454 train acc 0.874029933481153
epoch 7 batch id 501 loss 0.5215723514556885 train acc 0.8715069860279441
epoch 7 train acc 0.8710552268244576
epoch 7 test acc 0.7401315789473685



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.65it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.08it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.55it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.02it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.47it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.85it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.18it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.47it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.67it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.86it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.98it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.08it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.15it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.25it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.27it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.28it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.29it/s]

epoch 8 batch id 1 loss 1.3141316175460815 train acc 0.8125
epoch 8 batch id 51 loss 0.43166840076446533 train acc 0.8897058823529411
epoch 8 batch id 101 loss 0.5679216384887695 train acc 0.8997524752475248
epoch 8 batch id 151 loss 0.4229550361633301 train acc 0.8994205298013245
epoch 8 batch id 201 loss 0.5495864152908325 train acc 0.8967661691542289
epoch 8 batch id 251 loss 0.4949324131011963 train acc 0.8951693227091634
epoch 8 batch id 301 loss 0.7146971225738525 train acc 0.8934800664451827
epoch 8 batch id 351 loss 0.16557766497135162 train acc 0.8897792022792023
epoch 8 batch id 401 loss 0.7046633958816528 train acc 0.888715710723192
epoch 8 batch id 451 loss 0.8848505616188049 train acc 0.8894124168514412
epoch 8 batch id 501 loss 0.533635675907135 train acc 0.8909680638722555
epoch 8 train acc 0.8900394477317555


 80%|████████  | 8/10 [44:52<11:06, 333.07s/it]

epoch 8 test acc 0.7390350877192983
epoch 9 batch id 1 loss 0.3699207007884979 train acc 1.0
epoch 9 batch id 51 loss 0.8608319163322449 train acc 0.8982843137254902
epoch 9 batch id 101 loss 0.5151142477989197 train acc 0.9028465346534653
epoch 9 batch id 151 loss 0.10612499713897705 train acc 0.9072847682119205
epoch 9 batch id 201 loss 0.1732265055179596 train acc 0.9048507462686567
epoch 9 batch id 251 loss 0.39024853706359863 train acc 0.9056274900398407
epoch 9 batch id 301 loss 1.3371214866638184 train acc 0.9071843853820598
epoch 9 batch id 351 loss 0.5658342838287354 train acc 0.905982905982906
epoch 9 batch id 401 loss 0.2552429139614105 train acc 0.9074189526184538
epoch 9 batch id 451 loss 0.25151896476745605 train acc 0.9088137472283814
epoch 9 batch id 501 loss 0.15395116806030273 train acc 0.908433133732535
epoch 9 train acc 0.9086538461538461
epoch 9 test acc 0.7664473684210527



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.02it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.49it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.83it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.19it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.49it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.74it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.93it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.06it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.15it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 10 batch id 1 loss 0.7401630282402039 train acc 0.875
epoch 10 batch id 51 loss 0.2827742099761963 train acc 0.9240196078431373
epoch 10 batch id 101 loss 0.5423387885093689 train acc 0.9251237623762376
epoch 10 batch id 151 loss 0.9023863673210144 train acc 0.9263245033112583
epoch 10 batch id 201 loss 1.5366356372833252 train acc 0.9259950248756219
epoch 10 batch id 251 loss 0.4966064989566803 train acc 0.9260458167330677
epoch 10 batch id 301 loss 0.6118698716163635 train acc 0.9262873754152824
epoch 10 batch id 351 loss 0.08395421504974365 train acc 0.926460113960114
epoch 10 batch id 401 loss 0.640378475189209 train acc 0.9276807980049875
epoch 10 batch id 451 loss 0.2581324577331543 train acc 0.9251662971175166
epoch 10 batch id 501 loss 1.0051722526550293 train acc 0.9245259481037924
epoch 10 train acc 0.9249260355029586


100%|██████████| 10/10 [55:51<00:00, 335.14s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7521929824561403


100%|██████████| 63/63 [00:12<00:00,  5.18it/s]


FOLD 1
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.0798020362854 train acc 0.0
epoch 1 batch id 51 loss 5.082928657531738 train acc 0.4583333333333333
epoch 1 batch id 101 loss 3.1912710666656494 train acc 0.484529702970297
epoch 1 batch id 151 loss 4.195122718811035 train acc 0.4764072847682119
epoch 1 batch id 201 loss 4.605108261108398 train acc 0.47699004975124376
epoch 1 batch id 251 loss 4.144044876098633 train acc 0.4805776892430279
epoch 1 batch id 301 loss 4.577388763427734 train acc 0.4808970099667774
epoch 1 batch id 351 loss 5.074304580688477 train acc 0.4821937321937322
epoch 1 batch id 401 loss 3.389678955078125 train acc 0.48940149625935164
epoch 1 batch id 451 loss 2.965489387512207 train acc 0.4875277161862528
epoch 1 batch id 501 loss 3.712080955505371 train acc 0.4903942115768463
epoch 1 train acc 0.4910009861932939
epoch 1 test acc 0.5350877192982456



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.52it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.94it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.40it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.87it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.34it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.75it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.13it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.31it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.37it/s]

epoch 2 batch id 1 loss 3.2895729541778564 train acc 0.625
epoch 2 batch id 51 loss 6.082374572753906 train acc 0.5514705882352942
epoch 2 batch id 101 loss 4.132737159729004 train acc 0.5544554455445545
epoch 2 batch id 151 loss 3.1976380348205566 train acc 0.5670529801324503
epoch 2 batch id 201 loss 2.1409363746643066 train acc 0.5752487562189055
epoch 2 batch id 251 loss 3.4789984226226807 train acc 0.5906374501992032
epoch 2 batch id 301 loss 1.9760806560516357 train acc 0.6015365448504983
epoch 2 batch id 351 loss 2.1743338108062744 train acc 0.6120014245014245
epoch 2 batch id 401 loss 4.264560222625732 train acc 0.6193890274314214
epoch 2 batch id 451 loss 1.8726327419281006 train acc 0.6283259423503326
epoch 2 batch id 501 loss 2.7417421340942383 train acc 0.6339820359281437
epoch 2 train acc 0.6349852071005917
epoch 2 test acc 0.6973684210526315



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.95it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.41it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.88it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.34it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.76it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.12it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.67it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.00it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.23it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 3 batch id 1 loss 2.3397057056427 train acc 0.75
epoch 3 batch id 51 loss 3.57169771194458 train acc 0.7328431372549019
epoch 3 batch id 101 loss 1.9286398887634277 train acc 0.7110148514851485
epoch 3 batch id 151 loss 2.6991584300994873 train acc 0.7094370860927153
epoch 3 batch id 201 loss 1.4444655179977417 train acc 0.7192164179104478
epoch 3 batch id 251 loss 0.9929608106613159 train acc 0.7231075697211156
epoch 3 batch id 301 loss 0.9680414199829102 train acc 0.7242524916943521
epoch 3 batch id 351 loss 1.5713369846343994 train acc 0.7286324786324786
epoch 3 batch id 401 loss 1.763307809829712 train acc 0.7266209476309227
epoch 3 batch id 451 loss 2.4628095626831055 train acc 0.7251940133037694
epoch 3 batch id 501 loss 1.6481549739837646 train acc 0.7272954091816367
epoch 3 train acc 0.7264546351084813
epoch 3 test acc 0.7182017543859649



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.96it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.42it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.89it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.78it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.13it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.08it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.16it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.22it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.29it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.31it/s]

epoch 4 batch id 1 loss 2.088256359100342 train acc 0.6875
epoch 4 batch id 51 loss 1.6313700675964355 train acc 0.7573529411764706
epoch 4 batch id 101 loss 1.0100033283233643 train acc 0.7592821782178217
epoch 4 batch id 151 loss 1.9731943607330322 train acc 0.7669701986754967
epoch 4 batch id 201 loss 1.1685428619384766 train acc 0.7674129353233831
epoch 4 batch id 251 loss 1.7510428428649902 train acc 0.7689243027888446
epoch 4 batch id 301 loss 1.4221718311309814 train acc 0.772217607973422
epoch 4 batch id 351 loss 1.590256929397583 train acc 0.7736823361823362
epoch 4 batch id 401 loss 0.8426728844642639 train acc 0.7744700748129676
epoch 4 batch id 451 loss 0.5644865036010742 train acc 0.7735587583148559
epoch 4 batch id 501 loss 0.8563003540039062 train acc 0.7749500998003992
epoch 4 train acc 0.77465483234714
epoch 4 test acc 0.7521929824561403



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.56it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.98it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.44it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.91it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.13it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.65it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.83it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.95it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.04it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.13it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.18it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.22it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.25it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.28it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.31it/s]

epoch 5 batch id 1 loss 2.1926231384277344 train acc 0.625
epoch 5 batch id 51 loss 0.9289064407348633 train acc 0.803921568627451
epoch 5 batch id 101 loss 1.8446588516235352 train acc 0.8106435643564357
epoch 5 batch id 151 loss 1.7714920043945312 train acc 0.8029801324503312
epoch 5 batch id 201 loss 1.8923277854919434 train acc 0.798818407960199
epoch 5 batch id 251 loss 1.1477174758911133 train acc 0.8020418326693227
epoch 5 batch id 301 loss 1.1123512983322144 train acc 0.803156146179402
epoch 5 batch id 351 loss 1.2716505527496338 train acc 0.8057336182336182
epoch 5 batch id 401 loss 0.8472216129302979 train acc 0.8062655860349127
epoch 5 batch id 451 loss 2.8966736793518066 train acc 0.8051552106430155
epoch 5 batch id 501 loss 1.0695838928222656 train acc 0.8052644710578842
epoch 5 train acc 0.8043639053254438


 50%|█████     | 5/10 [28:12<27:54, 334.94s/it]

epoch 5 test acc 0.7379385964912281
epoch 6 batch id 1 loss 1.407414197921753 train acc 0.8125
epoch 6 batch id 51 loss 1.040419340133667 train acc 0.8345588235294118
epoch 6 batch id 101 loss 0.650994062423706 train acc 0.8261138613861386
epoch 6 batch id 151 loss 0.9239165782928467 train acc 0.8286423841059603
epoch 6 batch id 201 loss 1.4974054098129272 train acc 0.8305348258706468
epoch 6 batch id 251 loss 0.5629842281341553 train acc 0.8326693227091634
epoch 6 batch id 301 loss 0.6825548410415649 train acc 0.8338870431893688
epoch 6 batch id 351 loss 2.2021703720092773 train acc 0.8319088319088319
epoch 6 batch id 401 loss 1.1270439624786377 train acc 0.8322942643391521
epoch 6 batch id 451 loss 0.510322093963623 train acc 0.8305155210643016
epoch 6 batch id 501 loss 0.3395233750343323 train acc 0.8300898203592815
epoch 6 train acc 0.8298816568047337


 60%|██████    | 6/10 [33:27<21:55, 328.89s/it]

epoch 6 test acc 0.7116228070175439
epoch 7 batch id 1 loss 0.7278760671615601 train acc 0.8125
epoch 7 batch id 51 loss 0.757917046546936 train acc 0.8480392156862745
epoch 7 batch id 101 loss 0.8369369506835938 train acc 0.8595297029702971
epoch 7 batch id 151 loss 0.38804563879966736 train acc 0.8600993377483444
epoch 7 batch id 201 loss 0.6590901017189026 train acc 0.8669154228855721
epoch 7 batch id 251 loss 0.27209997177124023 train acc 0.8630478087649402
epoch 7 batch id 301 loss 0.30548030138015747 train acc 0.8596345514950167
epoch 7 batch id 351 loss 0.7020832300186157 train acc 0.8598646723646723
epoch 7 batch id 401 loss 1.3594255447387695 train acc 0.8598815461346634
epoch 7 batch id 451 loss 0.8835315704345703 train acc 0.8604490022172949
epoch 7 batch id 501 loss 0.2652609944343567 train acc 0.8609031936127745
epoch 7 train acc 0.8613165680473372


 70%|███████   | 7/10 [38:41<16:13, 324.60s/it]

epoch 7 test acc 0.743421052631579
epoch 8 batch id 1 loss 0.3947584629058838 train acc 1.0
epoch 8 batch id 51 loss 1.210803508758545 train acc 0.8590686274509803
epoch 8 batch id 101 loss 0.3111746311187744 train acc 0.8818069306930693
epoch 8 batch id 151 loss 0.7286425828933716 train acc 0.8849337748344371
epoch 8 batch id 201 loss 0.7089042663574219 train acc 0.8777985074626866
epoch 8 batch id 251 loss 0.96341472864151 train acc 0.8802290836653387
epoch 8 batch id 301 loss 1.0660731792449951 train acc 0.8797757475083057
epoch 8 batch id 351 loss 0.3793359398841858 train acc 0.8792735042735043
epoch 8 batch id 401 loss 0.2933393716812134 train acc 0.8796758104738155
epoch 8 batch id 451 loss 0.47952884435653687 train acc 0.877910199556541
epoch 8 batch id 501 loss 1.3795418739318848 train acc 0.876372255489022
epoch 8 train acc 0.8763560157790927
epoch 8 test acc 0.7554824561403509



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.56it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.98it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.44it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.92it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.38it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.78it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.15it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.47it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.72it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.04it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.22it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 9 batch id 1 loss 0.49345946311950684 train acc 0.9375
epoch 9 batch id 51 loss 0.7971930503845215 train acc 0.8958333333333334
epoch 9 batch id 101 loss 0.9866137504577637 train acc 0.9022277227722773
epoch 9 batch id 151 loss 0.3090706467628479 train acc 0.9010761589403974
epoch 9 batch id 201 loss 0.5223482251167297 train acc 0.9026741293532339
epoch 9 batch id 251 loss 0.5559723377227783 train acc 0.900398406374502
epoch 9 batch id 301 loss 0.9253281354904175 train acc 0.9055232558139535
epoch 9 batch id 351 loss 0.4521198868751526 train acc 0.9042022792022792
epoch 9 batch id 401 loss 0.6137540340423584 train acc 0.9011845386533666
epoch 9 batch id 451 loss 0.2553666830062866 train acc 0.9009146341463414
epoch 9 batch id 501 loss 0.25106281042099 train acc 0.9015718562874252
epoch 9 train acc 0.9013806706114399


 90%|█████████ | 9/10 [49:41<05:25, 325.87s/it]

epoch 9 test acc 0.7379385964912281
epoch 10 batch id 1 loss 0.4413570761680603 train acc 0.9375
epoch 10 batch id 51 loss 0.19303664565086365 train acc 0.9325980392156863
epoch 10 batch id 101 loss 0.13386723399162292 train acc 0.9368811881188119
epoch 10 batch id 151 loss 0.30753013491630554 train acc 0.9370860927152318
epoch 10 batch id 201 loss 0.7041922211647034 train acc 0.933768656716418
epoch 10 batch id 251 loss 0.35941576957702637 train acc 0.9277888446215139
epoch 10 batch id 301 loss 0.4706230163574219 train acc 0.9240033222591362
epoch 10 batch id 351 loss 0.4437437653541565 train acc 0.9211182336182336
epoch 10 batch id 401 loss 0.6571615934371948 train acc 0.9192643391521197
epoch 10 batch id 451 loss 0.27756959199905396 train acc 0.916019955654102
epoch 10 batch id 501 loss 1.1167659759521484 train acc 0.9162924151696606
epoch 10 train acc 0.916543392504931
epoch 10 test acc 0.7576754385964912



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.56it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.97it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.43it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.92it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.38it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.79it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.16it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.47it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.88it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

FOLD 2
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.209519863128662 train acc 0.0625
epoch 1 batch id 51 loss 4.557405471801758 train acc 0.47058823529411764
epoch 1 batch id 101 loss 5.183811187744141 train acc 0.48205445544554454
epoch 1 batch id 151 loss 3.948455810546875 train acc 0.48468543046357615
epoch 1 batch id 201 loss 4.352262020111084 train acc 0.48445273631840796
epoch 1 batch id 251 loss 5.422328948974609 train acc 0.4845617529880478
epoch 1 batch id 301 loss 3.8321852684020996 train acc 0.48650332225913623
epoch 1 batch id 351 loss 4.299463272094727 train acc 0.48896011396011396
epoch 1 batch id 401 loss 3.091042995452881 train acc 0.4929862842892768
epoch 1 batch id 451 loss 4.330604553222656 train acc 0.4959811529933481
epoch 1 batch id 501 loss 3.596652030944824 train acc 0.4981287425149701
epoch 1 train acc 0.4982741617357002
epoch 1 test acc 0.5504385964912281



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.53it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.94it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.40it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.88it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.35it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.13it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.04it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.37it/s]

epoch 2 batch id 1 loss 2.7407569885253906 train acc 0.625
epoch 2 batch id 51 loss 2.206773042678833 train acc 0.5661764705882353
epoch 2 batch id 101 loss 3.6193342208862305 train acc 0.5767326732673267
epoch 2 batch id 151 loss 2.7625293731689453 train acc 0.5923013245033113
epoch 2 batch id 201 loss 2.100874900817871 train acc 0.6057213930348259
epoch 2 batch id 251 loss 2.5166256427764893 train acc 0.6152888446215139
epoch 2 batch id 301 loss 1.8222750425338745 train acc 0.6268687707641196
epoch 2 batch id 351 loss 3.2842297554016113 train acc 0.6381766381766382
epoch 2 batch id 401 loss 1.2860422134399414 train acc 0.643859102244389
epoch 2 batch id 451 loss 1.6720564365386963 train acc 0.6481430155210643
epoch 2 batch id 501 loss 2.109476089477539 train acc 0.6530688622754491
epoch 2 train acc 0.653353057199211
epoch 2 test acc 0.6885964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.55it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.97it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.43it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.90it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.35it/s]

epoch 3 batch id 1 loss 1.9031193256378174 train acc 0.8125
epoch 3 batch id 51 loss 1.0425152778625488 train acc 0.7512254901960784
epoch 3 batch id 101 loss 1.633653998374939 train acc 0.7431930693069307
epoch 3 batch id 151 loss 2.878648281097412 train acc 0.7471026490066225
epoch 3 batch id 201 loss 2.4496726989746094 train acc 0.7437810945273632
epoch 3 batch id 251 loss 1.755324363708496 train acc 0.7452689243027888
epoch 3 batch id 301 loss 1.290117859840393 train acc 0.7425249169435216
epoch 3 batch id 351 loss 2.6298437118530273 train acc 0.7419871794871795
epoch 3 batch id 401 loss 1.3657350540161133 train acc 0.7415835411471322
epoch 3 batch id 451 loss 2.323289394378662 train acc 0.7412694013303769
epoch 3 batch id 501 loss 1.026566982269287 train acc 0.7425149700598802
epoch 3 train acc 0.7417406311637081
epoch 3 test acc 0.7116228070175439



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.62it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.00it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.46it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.95it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.41it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.82it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.17it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.48it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.28it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 4 batch id 1 loss 1.4072206020355225 train acc 0.875
epoch 4 batch id 51 loss 1.5852422714233398 train acc 0.7830882352941176
epoch 4 batch id 101 loss 1.6665866374969482 train acc 0.7679455445544554
epoch 4 batch id 151 loss 0.5979694128036499 train acc 0.769453642384106
epoch 4 batch id 201 loss 1.3778082132339478 train acc 0.7683457711442786
epoch 4 batch id 251 loss 0.6577003002166748 train acc 0.776394422310757
epoch 4 batch id 301 loss 0.6659855842590332 train acc 0.7740863787375415
epoch 4 batch id 351 loss 1.7564184665679932 train acc 0.7736823361823362
epoch 4 batch id 401 loss 0.718002438545227 train acc 0.7786783042394015
epoch 4 batch id 451 loss 1.4849164485931396 train acc 0.7789634146341463
epoch 4 batch id 501 loss 1.8535276651382446 train acc 0.7789421157684631
epoch 4 train acc 0.7793392504930966
epoch 4 test acc 0.7379385964912281



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.59it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.01it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.48it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.49it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.34it/s]

epoch 5 batch id 1 loss 2.283712863922119 train acc 0.625
epoch 5 batch id 51 loss 2.3432607650756836 train acc 0.8075980392156863
epoch 5 batch id 101 loss 1.2893861532211304 train acc 0.8087871287128713
epoch 5 batch id 151 loss 1.3287646770477295 train acc 0.8004966887417219
epoch 5 batch id 201 loss 0.5267213582992554 train acc 0.7972636815920398
epoch 5 batch id 251 loss 1.4189506769180298 train acc 0.7993027888446215
epoch 5 batch id 301 loss 0.942451000213623 train acc 0.7998338870431894
epoch 5 batch id 351 loss 1.1592833995819092 train acc 0.8059116809116809
epoch 5 batch id 401 loss 1.33189058303833 train acc 0.8072007481296758
epoch 5 batch id 451 loss 1.4104276895523071 train acc 0.8104212860310421
epoch 5 batch id 501 loss 1.4171502590179443 train acc 0.810129740518962
epoch 5 train acc 0.8106508875739645
epoch 5 test acc 0.7510964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.56it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.97it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.44it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.91it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.38it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.80it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.15it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.46it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 6 batch id 1 loss 1.6338982582092285 train acc 0.75
epoch 6 batch id 51 loss 0.5794574022293091 train acc 0.8284313725490197
epoch 6 batch id 101 loss 1.2521064281463623 train acc 0.8422029702970297
epoch 6 batch id 151 loss 0.5709248781204224 train acc 0.8369205298013245
epoch 6 batch id 201 loss 0.9135462045669556 train acc 0.8420398009950248
epoch 6 batch id 251 loss 1.1948869228363037 train acc 0.8411354581673307
epoch 6 batch id 301 loss 1.762345790863037 train acc 0.8401162790697675
epoch 6 batch id 351 loss 1.068725824356079 train acc 0.8392094017094017
epoch 6 batch id 401 loss 1.3357439041137695 train acc 0.8411783042394015
epoch 6 batch id 451 loss 0.8700954914093018 train acc 0.8411862527716186
epoch 6 batch id 501 loss 1.022312045097351 train acc 0.8390718562874252
epoch 6 train acc 0.8392504930966469
epoch 6 test acc 0.7631578947368421



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.56it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.98it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.45it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.93it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.39it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.79it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.15it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.46it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.24it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.28it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.31it/s]

epoch 7 batch id 1 loss 0.17447763681411743 train acc 1.0
epoch 7 batch id 51 loss 2.3780975341796875 train acc 0.8688725490196079
epoch 7 batch id 101 loss 1.3769991397857666 train acc 0.8669554455445545
epoch 7 batch id 151 loss 0.6538244485855103 train acc 0.8654801324503312
epoch 7 batch id 201 loss 0.9160357713699341 train acc 0.8656716417910447
epoch 7 batch id 251 loss 0.5048757791519165 train acc 0.8652888446215139
epoch 7 batch id 301 loss 0.7209354639053345 train acc 0.8658637873754153
epoch 7 batch id 351 loss 0.8612720370292664 train acc 0.8660968660968661
epoch 7 batch id 401 loss 0.24858906865119934 train acc 0.8662718204488778
epoch 7 batch id 451 loss 0.5029572248458862 train acc 0.8671008869179601
epoch 7 batch id 501 loss 0.6093217134475708 train acc 0.8643962075848304
epoch 7 train acc 0.8635355029585798


 70%|███████   | 7/10 [39:34<16:44, 334.76s/it]

epoch 7 test acc 0.7521929824561403
epoch 8 batch id 1 loss 1.3547682762145996 train acc 0.8125
epoch 8 batch id 51 loss 0.672629714012146 train acc 0.8786764705882353
epoch 8 batch id 101 loss 1.1501293182373047 train acc 0.8811881188118812
epoch 8 batch id 151 loss 0.4893309473991394 train acc 0.8861754966887417
epoch 8 batch id 201 loss 0.31289124488830566 train acc 0.8837064676616916
epoch 8 batch id 251 loss 0.3385750651359558 train acc 0.8829681274900398
epoch 8 batch id 301 loss 0.18209382891654968 train acc 0.8810215946843853
epoch 8 batch id 351 loss 0.5626481771469116 train acc 0.8789173789173789
epoch 8 batch id 401 loss 0.24235358834266663 train acc 0.8770261845386533
epoch 8 batch id 451 loss 1.1800041198730469 train acc 0.8795731707317073
epoch 8 batch id 501 loss 0.3469145894050598 train acc 0.8787425149700598
epoch 8 train acc 0.8783284023668639
epoch 8 test acc 0.7741228070175439



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.49it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.90it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.36it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.83it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.30it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.71it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.09it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.41it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.66it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.34it/s]

epoch 9 batch id 1 loss 0.3710651993751526 train acc 0.9375
epoch 9 batch id 51 loss 0.4126967191696167 train acc 0.9166666666666666
epoch 9 batch id 101 loss 0.5380135774612427 train acc 0.9152227722772277
epoch 9 batch id 151 loss 0.372668594121933 train acc 0.9122516556291391
epoch 9 batch id 201 loss 0.20638060569763184 train acc 0.9123134328358209
epoch 9 batch id 251 loss 0.42846667766571045 train acc 0.9135956175298805
epoch 9 batch id 301 loss 0.13929161429405212 train acc 0.9119601328903655
epoch 9 batch id 351 loss 0.2860335111618042 train acc 0.9097222222222222
epoch 9 batch id 401 loss 0.05036824569106102 train acc 0.9081982543640897
epoch 9 batch id 451 loss 0.5993714928627014 train acc 0.9064578713968958
epoch 9 batch id 501 loss 1.8417927026748657 train acc 0.9034431137724551
epoch 9 train acc 0.9032297830374754


 90%|█████████ | 9/10 [50:41<05:32, 332.48s/it]

epoch 9 test acc 0.7532894736842105
epoch 10 batch id 1 loss 0.762730598449707 train acc 0.875
epoch 10 batch id 51 loss 0.930063009262085 train acc 0.9056372549019608
epoch 10 batch id 101 loss 0.2410661280155182 train acc 0.9164603960396039
epoch 10 batch id 151 loss 0.3600287139415741 train acc 0.9209437086092715
epoch 10 batch id 201 loss 0.8114380836486816 train acc 0.9200870646766169
epoch 10 batch id 251 loss 0.5107895731925964 train acc 0.9210657370517928
epoch 10 batch id 301 loss 0.8388534784317017 train acc 0.9221345514950167
epoch 10 batch id 351 loss 1.74570894241333 train acc 0.9196937321937322
epoch 10 batch id 401 loss 0.8634995818138123 train acc 0.9178615960099751
epoch 10 batch id 451 loss 0.7858487367630005 train acc 0.9161585365853658
epoch 10 batch id 501 loss 0.21681585907936096 train acc 0.9166666666666666
epoch 10 train acc 0.9167899408284024


100%|██████████| 10/10 [55:56<00:00, 335.62s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7532894736842105


100%|██████████| 63/63 [00:12<00:00,  5.17it/s]


FOLD 3
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.5347137451171875 train acc 0.0
epoch 1 batch id 51 loss 5.109021186828613 train acc 0.4644607843137255
epoch 1 batch id 101 loss 4.667058944702148 train acc 0.4771039603960396
epoch 1 batch id 151 loss 3.2603464126586914 train acc 0.4768211920529801
epoch 1 batch id 201 loss 3.264533042907715 train acc 0.4791666666666667
epoch 1 batch id 251 loss 3.866168975830078 train acc 0.4860557768924303
epoch 1 batch id 301 loss 4.547737121582031 train acc 0.4889950166112957
epoch 1 batch id 351 loss 3.564513683319092 train acc 0.4896723646723647
epoch 1 batch id 401 loss 3.982297897338867 train acc 0.48831047381546133
epoch 1 batch id 451 loss 4.446389198303223 train acc 0.4854490022172949
epoch 1 batch id 501 loss 3.2423269748687744 train acc 0.48827345309381237
epoch 1 train acc 0.48878205128205127
epoch 1 test acc 0.5296052631578947



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.53it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.94it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.40it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.88it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.33it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.74it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.12it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.37it/s]

epoch 2 batch id 1 loss 4.495744705200195 train acc 0.375
epoch 2 batch id 51 loss 3.4273171424865723 train acc 0.5441176470588235
epoch 2 batch id 101 loss 4.5264692306518555 train acc 0.5587871287128713
epoch 2 batch id 151 loss 3.367511510848999 train acc 0.5670529801324503
epoch 2 batch id 201 loss 2.683516025543213 train acc 0.5740049751243781
epoch 2 batch id 251 loss 3.2319958209991455 train acc 0.5737051792828686
epoch 2 batch id 301 loss 3.2124834060668945 train acc 0.5822259136212624
epoch 2 batch id 351 loss 2.121877431869507 train acc 0.5909900284900285
epoch 2 batch id 401 loss 3.1744437217712402 train acc 0.601932668329177
epoch 2 batch id 451 loss 1.4497148990631104 train acc 0.6105875831485588
epoch 2 batch id 501 loss 2.597719669342041 train acc 0.6165169660678643
epoch 2 train acc 0.6157544378698225
epoch 2 test acc 0.6962719298245614



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.61it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.04it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.50it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.97it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.85it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.50it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.74it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 3 batch id 1 loss 1.9313042163848877 train acc 0.75
epoch 3 batch id 51 loss 2.141098737716675 train acc 0.6985294117647058
epoch 3 batch id 101 loss 2.116063117980957 train acc 0.6986386138613861
epoch 3 batch id 151 loss 3.3008487224578857 train acc 0.7007450331125827
epoch 3 batch id 201 loss 1.2159501314163208 train acc 0.7039800995024875
epoch 3 batch id 251 loss 1.2063677310943604 train acc 0.7024402390438247
epoch 3 batch id 301 loss 1.6602880954742432 train acc 0.704734219269103
epoch 3 batch id 351 loss 2.1201329231262207 train acc 0.7063746438746439
epoch 3 batch id 401 loss 1.6543586254119873 train acc 0.7065149625935162
epoch 3 batch id 451 loss 1.5134804248809814 train acc 0.7102272727272727
epoch 3 batch id 501 loss 0.9390112161636353 train acc 0.7108283433133733
epoch 3 train acc 0.7098126232741617
epoch 3 test acc 0.7302631578947368



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.55it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.97it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.43it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.91it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.23it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.27it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.33it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.34it/s]

epoch 4 batch id 1 loss 2.4890546798706055 train acc 0.625
epoch 4 batch id 51 loss 1.3751490116119385 train acc 0.7683823529411765
epoch 4 batch id 101 loss 2.656559705734253 train acc 0.7673267326732673
epoch 4 batch id 151 loss 2.0235729217529297 train acc 0.7628311258278145
epoch 4 batch id 201 loss 2.491173505783081 train acc 0.753731343283582
epoch 4 batch id 251 loss 1.0918655395507812 train acc 0.7542330677290837
epoch 4 batch id 301 loss 1.0401140451431274 train acc 0.7539451827242525
epoch 4 batch id 351 loss 1.256852388381958 train acc 0.7585470085470085
epoch 4 batch id 401 loss 0.6978179216384888 train acc 0.7598192019950125
epoch 4 batch id 451 loss 1.689032793045044 train acc 0.7621951219512195
epoch 4 batch id 501 loss 1.5479285717010498 train acc 0.7635978043912176
epoch 4 train acc 0.763560157790927
epoch 4 test acc 0.7401315789473685



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.96it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.39it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.87it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.34it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.76it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.00it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 5 batch id 1 loss 1.3847613334655762 train acc 0.6875
epoch 5 batch id 51 loss 1.563051462173462 train acc 0.7892156862745098
epoch 5 batch id 101 loss 0.45305803418159485 train acc 0.8038366336633663
epoch 5 batch id 151 loss 1.2201056480407715 train acc 0.8050496688741722
epoch 5 batch id 201 loss 1.1432268619537354 train acc 0.806592039800995
epoch 5 batch id 251 loss 1.6885619163513184 train acc 0.8037848605577689
epoch 5 batch id 301 loss 1.2940528392791748 train acc 0.8023255813953488
epoch 5 batch id 351 loss 0.726759672164917 train acc 0.8003917378917379
epoch 5 batch id 401 loss 0.507001519203186 train acc 0.7994077306733167
epoch 5 batch id 451 loss 1.1528695821762085 train acc 0.8007206208425721
epoch 5 batch id 501 loss 1.0362322330474854 train acc 0.8013972055888223
epoch 5 train acc 0.7996794871794872
epoch 5 test acc 0.743421052631579



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.03it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.49it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.97it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.50it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.73it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.92it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.27it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 6 batch id 1 loss 1.7941620349884033 train acc 0.6875
epoch 6 batch id 51 loss 1.6019740104675293 train acc 0.8272058823529411
epoch 6 batch id 101 loss 2.6480369567871094 train acc 0.817450495049505
epoch 6 batch id 151 loss 1.1684792041778564 train acc 0.8216059602649006
epoch 6 batch id 201 loss 1.338679552078247 train acc 0.8212064676616916
epoch 6 batch id 251 loss 1.1742603778839111 train acc 0.8249501992031872
epoch 6 batch id 301 loss 0.8511338233947754 train acc 0.8241279069767442
epoch 6 batch id 351 loss 1.7331626415252686 train acc 0.8253205128205128
epoch 6 batch id 401 loss 0.4358733296394348 train acc 0.8232543640897756
epoch 6 batch id 451 loss 1.0314662456512451 train acc 0.8237250554323725
epoch 6 batch id 501 loss 1.1318814754486084 train acc 0.8223552894211577
epoch 6 train acc 0.8228550295857988
epoch 6 test acc 0.7467105263157895



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.68it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.11it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.57it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.04it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.50it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.92it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.27it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.55it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.77it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.95it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.08it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 7 batch id 1 loss 1.1068549156188965 train acc 0.8125
epoch 7 batch id 51 loss 0.7541722059249878 train acc 0.8517156862745098
epoch 7 batch id 101 loss 0.9001847505569458 train acc 0.8483910891089109
epoch 7 batch id 151 loss 0.37690576910972595 train acc 0.8543046357615894
epoch 7 batch id 201 loss 0.890432596206665 train acc 0.8541666666666666
epoch 7 batch id 251 loss 0.45337826013565063 train acc 0.8573207171314741
epoch 7 batch id 301 loss 1.8197497129440308 train acc 0.8552740863787376
epoch 7 batch id 351 loss 1.7702393531799316 train acc 0.853988603988604
epoch 7 batch id 401 loss 0.6764931678771973 train acc 0.851932668329177
epoch 7 batch id 451 loss 0.4969513416290283 train acc 0.8528270509977827
epoch 7 batch id 501 loss 0.2648102045059204 train acc 0.8525449101796407
epoch 7 train acc 0.852810650887574


 70%|███████   | 7/10 [39:32<16:42, 334.25s/it]

epoch 7 test acc 0.7456140350877193
epoch 8 batch id 1 loss 1.3001924753189087 train acc 0.8125
epoch 8 batch id 51 loss 0.633154034614563 train acc 0.8872549019607843
epoch 8 batch id 101 loss 0.664609432220459 train acc 0.8793316831683168
epoch 8 batch id 151 loss 0.7700498700141907 train acc 0.8687913907284768
epoch 8 batch id 201 loss 0.6453676223754883 train acc 0.8706467661691543
epoch 8 batch id 251 loss 0.347959041595459 train acc 0.8687749003984063
epoch 8 batch id 301 loss 1.2772197723388672 train acc 0.8689784053156147
epoch 8 batch id 351 loss 0.33118104934692383 train acc 0.8691239316239316
epoch 8 batch id 401 loss 0.2745199203491211 train acc 0.8701683291770573
epoch 8 batch id 451 loss 0.6136898994445801 train acc 0.8694567627494457
epoch 8 batch id 501 loss 1.3897292613983154 train acc 0.8711327345309381
epoch 8 train acc 0.8717948717948718


 80%|████████  | 8/10 [44:46<10:56, 328.28s/it]

epoch 8 test acc 0.7368421052631579
epoch 9 batch id 1 loss 1.3073680400848389 train acc 0.8125
epoch 9 batch id 51 loss 0.8058295845985413 train acc 0.9080882352941176
epoch 9 batch id 101 loss 0.6123790144920349 train acc 0.9071782178217822
epoch 9 batch id 151 loss 0.3597513735294342 train acc 0.9048013245033113
epoch 9 batch id 201 loss 0.18371900916099548 train acc 0.8986318407960199
epoch 9 batch id 251 loss 0.2223648875951767 train acc 0.9021414342629482
epoch 9 batch id 301 loss 0.3700425624847412 train acc 0.9034468438538206
epoch 9 batch id 351 loss 0.6078858375549316 train acc 0.9017094017094017
epoch 9 batch id 401 loss 0.34969258308410645 train acc 0.9025872817955112
epoch 9 batch id 451 loss 0.7501370906829834 train acc 0.9011917960088692
epoch 9 batch id 501 loss 0.2942877411842346 train acc 0.9014471057884231
epoch 9 train acc 0.9012573964497042
epoch 9 test acc 0.7467105263157895



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.67it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.10it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.57it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.05it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.51it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.92it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.27it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.55it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.78it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.96it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.08it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.23it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.31it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.37it/s]

epoch 10 batch id 1 loss 0.31127220392227173 train acc 0.9375
epoch 10 batch id 51 loss 0.31219083070755005 train acc 0.9276960784313726
epoch 10 batch id 101 loss 0.28754186630249023 train acc 0.9282178217821783
epoch 10 batch id 151 loss 0.42067575454711914 train acc 0.921771523178808
epoch 10 batch id 201 loss 0.6739548444747925 train acc 0.9175995024875622
epoch 10 batch id 251 loss 0.44959956407546997 train acc 0.9198207171314741
epoch 10 batch id 301 loss 0.3393735885620117 train acc 0.9177740863787376
epoch 10 batch id 351 loss 0.5013200640678406 train acc 0.9172008547008547
epoch 10 batch id 401 loss 0.32029664516448975 train acc 0.9166147132169576
epoch 10 batch id 451 loss 0.20819903910160065 train acc 0.9156042128603105
epoch 10 batch id 501 loss 0.42874109745025635 train acc 0.9155439121756487
epoch 10 train acc 0.9149408284023669
epoch 10 test acc 0.75



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.72it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.15it/s][A
  5%|▍         | 3/63 [00:00<00:22,  2.63it/s][A
  6%|▋         | 4/63 [00:01<00:18,  3.11it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.57it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.97it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.29it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.57it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.80it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.97it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.09it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.23it/s][A
 22%|██▏       | 14/63 [00:02<00:09,  5.28it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.32it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.34it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.37it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.38it/s]

FOLD 4
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.503673076629639 train acc 0.0
epoch 1 batch id 51 loss 3.917302131652832 train acc 0.42892156862745096
epoch 1 batch id 101 loss 6.122945785522461 train acc 0.44183168316831684
epoch 1 batch id 151 loss 3.840219736099243 train acc 0.4586092715231788
epoch 1 batch id 201 loss 3.997859239578247 train acc 0.4642412935323383
epoch 1 batch id 251 loss 4.7531633377075195 train acc 0.4688745019920319
epoch 1 batch id 301 loss 3.4264302253723145 train acc 0.47570598006644516
epoch 1 batch id 351 loss 5.891858100891113 train acc 0.47987891737891736
epoch 1 batch id 401 loss 3.06376576423645 train acc 0.48332294264339154
epoch 1 batch id 451 loss 4.92923641204834 train acc 0.49002217294900224
epoch 1 batch id 501 loss 3.4654412269592285 train acc 0.5022455089820359
epoch 1 train acc 0.5046844181459567
epoch 1 test acc 0.6535087719298246



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.61it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.04it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.50it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.98it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.44it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.49it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.72it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 2 batch id 1 loss 2.7660579681396484 train acc 0.75
epoch 2 batch id 51 loss 2.7320444583892822 train acc 0.6495098039215687
epoch 2 batch id 101 loss 2.5561084747314453 train acc 0.6336633663366337
epoch 2 batch id 151 loss 3.2375545501708984 train acc 0.6374172185430463
epoch 2 batch id 201 loss 2.4760847091674805 train acc 0.6464552238805971
epoch 2 batch id 251 loss 2.8109042644500732 train acc 0.6469123505976095
epoch 2 batch id 301 loss 1.8277431726455688 train acc 0.6524086378737541
epoch 2 batch id 351 loss 4.306055545806885 train acc 0.6543803418803419
epoch 2 batch id 401 loss 2.488475799560547 train acc 0.6610037406483791
epoch 2 batch id 451 loss 1.3727705478668213 train acc 0.6667128603104213
epoch 2 batch id 501 loss 1.9866206645965576 train acc 0.6730289421157685
epoch 2 train acc 0.6734467455621301
epoch 2 test acc 0.7390350877192983



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.63it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.06it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.53it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.00it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.46it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.88it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.22it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.52it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.74it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.92it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.15it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.22it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 3 batch id 1 loss 2.970865249633789 train acc 0.5625
epoch 3 batch id 51 loss 1.7726314067840576 train acc 0.7671568627450981
epoch 3 batch id 101 loss 2.512176513671875 train acc 0.7580445544554455
epoch 3 batch id 151 loss 2.0297486782073975 train acc 0.7574503311258278
epoch 3 batch id 201 loss 1.2037031650543213 train acc 0.7633706467661692
epoch 3 batch id 251 loss 1.363210678100586 train acc 0.7559760956175299
epoch 3 batch id 301 loss 1.1574097871780396 train acc 0.7526993355481728
epoch 3 batch id 351 loss 2.0982677936553955 train acc 0.7485754985754985
epoch 3 batch id 401 loss 1.8953347206115723 train acc 0.7468827930174564
epoch 3 batch id 451 loss 1.484929084777832 train acc 0.7448725055432373
epoch 3 batch id 501 loss 2.302196979522705 train acc 0.7448852295409182
epoch 3 train acc 0.7455621301775148
epoch 3 test acc 0.7642543859649122



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.68it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.11it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.58it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.06it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.51it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.92it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.27it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.55it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.78it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.94it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.07it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.23it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.31it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.37it/s]

epoch 4 batch id 1 loss 1.4364182949066162 train acc 0.8125
epoch 4 batch id 51 loss 1.8642621040344238 train acc 0.8088235294117647
epoch 4 batch id 101 loss 1.2472496032714844 train acc 0.8044554455445545
epoch 4 batch id 151 loss 1.0273888111114502 train acc 0.8000827814569537
epoch 4 batch id 201 loss 1.1815991401672363 train acc 0.8013059701492538
epoch 4 batch id 251 loss 1.5279319286346436 train acc 0.7950697211155379
epoch 4 batch id 301 loss 0.8480808734893799 train acc 0.7956810631229236
epoch 4 batch id 351 loss 1.995561122894287 train acc 0.7955840455840456
epoch 4 batch id 401 loss 1.954269528388977 train acc 0.7937967581047382
epoch 4 batch id 451 loss 1.658616304397583 train acc 0.7925443458980045
epoch 4 batch id 501 loss 2.198399543762207 train acc 0.7924151696606786
epoch 4 train acc 0.7920364891518737
epoch 4 test acc 0.7730263157894737



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.64it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.07it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.53it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.01it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.47it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.87it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.23it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.52it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.74it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.25it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.29it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.33it/s]

epoch 5 batch id 1 loss 0.5735074281692505 train acc 0.9375
epoch 5 batch id 51 loss 1.1677957773208618 train acc 0.8455882352941176
epoch 5 batch id 101 loss 0.8713928461074829 train acc 0.843440594059406
epoch 5 batch id 151 loss 1.1706230640411377 train acc 0.8385761589403974
epoch 5 batch id 201 loss 1.1426082849502563 train acc 0.8383084577114428
epoch 5 batch id 251 loss 1.3858174085617065 train acc 0.8351593625498008
epoch 5 batch id 301 loss 1.3548710346221924 train acc 0.831187707641196
epoch 5 batch id 351 loss 1.1042332649230957 train acc 0.8269230769230769
epoch 5 batch id 401 loss 0.8791475296020508 train acc 0.8255922693266833
epoch 5 batch id 451 loss 1.0559144020080566 train acc 0.8248337028824834
epoch 5 batch id 501 loss 1.3685224056243896 train acc 0.8248502994011976
epoch 5 train acc 0.823594674556213
epoch 5 test acc 0.7752192982456141



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.62it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.05it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.52it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.00it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.47it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.88it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.21it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.50it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.73it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.04it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.37it/s]

epoch 6 batch id 1 loss 1.6167807579040527 train acc 0.75
epoch 6 batch id 51 loss 1.390671968460083 train acc 0.8615196078431373
epoch 6 batch id 101 loss 1.4860212802886963 train acc 0.8638613861386139
epoch 6 batch id 151 loss 1.0907702445983887 train acc 0.8559602649006622
epoch 6 batch id 201 loss 0.11667458713054657 train acc 0.8606965174129353
epoch 6 batch id 251 loss 1.0055930614471436 train acc 0.8563247011952191
epoch 6 batch id 301 loss 0.4474627375602722 train acc 0.8556893687707641
epoch 6 batch id 351 loss 0.7825188636779785 train acc 0.8555911680911681
epoch 6 batch id 401 loss 2.525315761566162 train acc 0.8547381546134664
epoch 6 batch id 451 loss 0.6070845127105713 train acc 0.8537971175166297
epoch 6 batch id 501 loss 0.347520649433136 train acc 0.8529191616766467
epoch 6 train acc 0.8533037475345168


 60%|██████    | 6/10 [33:45<22:13, 333.44s/it]

epoch 6 test acc 0.768640350877193
epoch 7 batch id 1 loss 0.6368852257728577 train acc 0.9375
epoch 7 batch id 51 loss 0.2239939570426941 train acc 0.8762254901960784
epoch 7 batch id 101 loss 0.48282691836357117 train acc 0.8793316831683168
epoch 7 batch id 151 loss 0.5089688301086426 train acc 0.8787251655629139
epoch 7 batch id 201 loss 0.7083113789558411 train acc 0.8784203980099502
epoch 7 batch id 251 loss 0.6427083611488342 train acc 0.8762450199203188
epoch 7 batch id 301 loss 1.0024783611297607 train acc 0.876453488372093
epoch 7 batch id 351 loss 1.346308708190918 train acc 0.8776709401709402
epoch 7 batch id 401 loss 0.6192001104354858 train acc 0.876714463840399
epoch 7 batch id 451 loss 0.43425121903419495 train acc 0.8776330376940134
epoch 7 batch id 501 loss 1.445627212524414 train acc 0.8787425149700598
epoch 7 train acc 0.8795611439842209


 70%|███████   | 7/10 [38:59<16:23, 327.68s/it]

epoch 7 test acc 0.7587719298245614
epoch 8 batch id 1 loss 1.1594576835632324 train acc 0.75
epoch 8 batch id 51 loss 0.2818230986595154 train acc 0.8995098039215687
epoch 8 batch id 101 loss 1.378342628479004 train acc 0.9047029702970297
epoch 8 batch id 151 loss 0.9801512956619263 train acc 0.9035596026490066
epoch 8 batch id 201 loss 0.47133609652519226 train acc 0.904228855721393
epoch 8 batch id 251 loss 0.3246000409126282 train acc 0.9023904382470119
epoch 8 batch id 301 loss 1.237083911895752 train acc 0.8990863787375415
epoch 8 batch id 351 loss 0.685194730758667 train acc 0.8983262108262108
epoch 8 batch id 401 loss 0.6118612289428711 train acc 0.8982231920199502
epoch 8 batch id 451 loss 1.2192742824554443 train acc 0.8978658536585366
epoch 8 batch id 501 loss 0.508614182472229 train acc 0.8982035928143712
epoch 8 train acc 0.898052268244576


 80%|████████  | 8/10 [44:13<10:47, 323.65s/it]

epoch 8 test acc 0.7587719298245614
epoch 9 batch id 1 loss 0.3587852120399475 train acc 0.9375
epoch 9 batch id 51 loss 0.4332048296928406 train acc 0.9129901960784313
epoch 9 batch id 101 loss 0.3121926784515381 train acc 0.9183168316831684
epoch 9 batch id 151 loss 0.1299096643924713 train acc 0.9139072847682119
epoch 9 batch id 201 loss 0.6323639154434204 train acc 0.9073383084577115
epoch 9 batch id 251 loss 0.21301501989364624 train acc 0.9123505976095617
epoch 9 batch id 301 loss 0.8825037479400635 train acc 0.9113372093023255
epoch 9 batch id 351 loss 0.5072129964828491 train acc 0.9111467236467237
epoch 9 batch id 401 loss 0.21761661767959595 train acc 0.913497506234414
epoch 9 batch id 451 loss 0.527596652507782 train acc 0.9143569844789357
epoch 9 batch id 501 loss 0.20759332180023193 train acc 0.9135479041916168
epoch 9 train acc 0.9130917159763313
epoch 9 test acc 0.7785087719298246



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.51it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.92it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.38it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.86it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.32it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.73it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.11it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.42it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.68it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.29it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 10 batch id 1 loss 0.2550159990787506 train acc 0.9375
epoch 10 batch id 51 loss 0.19818398356437683 train acc 0.9338235294117647
epoch 10 batch id 101 loss 0.8906264305114746 train acc 0.9288366336633663
epoch 10 batch id 151 loss 0.35911232233047485 train acc 0.9271523178807947
epoch 10 batch id 201 loss 0.7823179960250854 train acc 0.9259950248756219
epoch 10 batch id 251 loss 0.8212039470672607 train acc 0.9243027888446215
epoch 10 batch id 301 loss 0.579461932182312 train acc 0.9271179401993356
epoch 10 batch id 351 loss 0.42613404989242554 train acc 0.9273504273504274
epoch 10 batch id 401 loss 0.21092531085014343 train acc 0.9251870324189526
epoch 10 batch id 451 loss 0.7236059308052063 train acc 0.9229490022172949
epoch 10 batch id 501 loss 0.38777410984039307 train acc 0.9225299401197605
epoch 10 train acc 0.921844181459566
epoch 10 test acc 0.7916666666666666



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.67it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.10it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.57it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.05it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.51it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.92it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.27it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.54it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.78it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.94it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.15it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.22it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

FOLD 5
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.7137041091918945 train acc 0.0
epoch 1 batch id 51 loss 5.521160125732422 train acc 0.4178921568627451
epoch 1 batch id 101 loss 2.5601840019226074 train acc 0.4535891089108911
epoch 1 batch id 151 loss 4.201617240905762 train acc 0.4615066225165563
epoch 1 batch id 201 loss 5.4643778800964355 train acc 0.4679726368159204
epoch 1 batch id 251 loss 2.991649627685547 train acc 0.4815737051792829
epoch 1 batch id 301 loss 5.154526710510254 train acc 0.4844269102990033
epoch 1 batch id 351 loss 4.58193826675415 train acc 0.48023504273504275
epoch 1 batch id 401 loss 3.1624221801757812 train acc 0.4802057356608479
epoch 1 batch id 451 loss 4.085047721862793 train acc 0.4815687361419069
epoch 1 batch id 501 loss 2.7792105674743652 train acc 0.48640219560878245
epoch 1 train acc 0.486439842209073
epoch 1 test acc 0.5646929824561403



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:42,  1.45it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.86it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.32it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.80it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.27it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.68it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.08it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.41it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.67it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 2 batch id 1 loss 4.172473907470703 train acc 0.5
epoch 2 batch id 51 loss 3.2087604999542236 train acc 0.5698529411764706
epoch 2 batch id 101 loss 2.958298683166504 train acc 0.5816831683168316
epoch 2 batch id 151 loss 2.9814083576202393 train acc 0.5865066225165563
epoch 2 batch id 201 loss 2.1942620277404785 train acc 0.5817786069651741
epoch 2 batch id 251 loss 2.536470890045166 train acc 0.5938745019920318
epoch 2 batch id 301 loss 2.367278575897217 train acc 0.6044435215946844
epoch 2 batch id 351 loss 3.045121669769287 train acc 0.6128917378917379
epoch 2 batch id 401 loss 2.204547882080078 train acc 0.6203241895261845
epoch 2 batch id 451 loss 1.6690654754638672 train acc 0.6273558758314856
epoch 2 batch id 501 loss 2.798460006713867 train acc 0.6339820359281437
epoch 2 train acc 0.6357248520710059
epoch 2 test acc 0.7116228070175439



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.02it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.49it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.43it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.83it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.18it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.48it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.72it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.05it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.15it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 3 batch id 1 loss 0.5881452560424805 train acc 0.9375
epoch 3 batch id 51 loss 1.5586509704589844 train acc 0.7169117647058824
epoch 3 batch id 101 loss 2.1006078720092773 train acc 0.7165841584158416
epoch 3 batch id 151 loss 1.7706882953643799 train acc 0.7181291390728477
epoch 3 batch id 201 loss 1.094468593597412 train acc 0.7217039800995025
epoch 3 batch id 251 loss 2.0407352447509766 train acc 0.7260956175298805
epoch 3 batch id 301 loss 2.2342772483825684 train acc 0.731312292358804
epoch 3 batch id 351 loss 1.9939899444580078 train acc 0.7314814814814815
epoch 3 batch id 401 loss 1.5903716087341309 train acc 0.7305174563591023
epoch 3 batch id 451 loss 3.2906107902526855 train acc 0.731430155210643
epoch 3 batch id 501 loss 0.8279998302459717 train acc 0.7339071856287425
epoch 3 train acc 0.7340976331360947


 30%|███       | 3/10 [16:30<38:24, 329.28s/it]

epoch 3 test acc 0.6831140350877193
epoch 4 batch id 1 loss 1.2438547611236572 train acc 0.75
epoch 4 batch id 51 loss 2.385138511657715 train acc 0.7916666666666666
epoch 4 batch id 101 loss 2.5472209453582764 train acc 0.7840346534653465
epoch 4 batch id 151 loss 2.648919105529785 train acc 0.7831125827814569
epoch 4 batch id 201 loss 1.3995912075042725 train acc 0.7845149253731343
epoch 4 batch id 251 loss 3.1240618228912354 train acc 0.7828685258964143
epoch 4 batch id 301 loss 1.7792389392852783 train acc 0.779485049833887
epoch 4 batch id 351 loss 2.02341890335083 train acc 0.7777777777777778
epoch 4 batch id 401 loss 0.9134730100631714 train acc 0.7788341645885287
epoch 4 batch id 451 loss 1.017608880996704 train acc 0.7784090909090909
epoch 4 batch id 501 loss 1.5676016807556152 train acc 0.7780688622754491
epoch 4 train acc 0.777983234714004
epoch 4 test acc 0.7730263157894737



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.96it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.42it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.89it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.46it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.33it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.34it/s]

epoch 5 batch id 1 loss 1.818228840827942 train acc 0.8125
epoch 5 batch id 51 loss 1.6514945030212402 train acc 0.8137254901960784
epoch 5 batch id 101 loss 2.345275402069092 train acc 0.8050742574257426
epoch 5 batch id 151 loss 2.299896717071533 train acc 0.8108443708609272
epoch 5 batch id 201 loss 1.277360439300537 train acc 0.8109452736318408
epoch 5 batch id 251 loss 0.6680982112884521 train acc 0.8129980079681275
epoch 5 batch id 301 loss 0.6164935827255249 train acc 0.8104235880398671
epoch 5 batch id 351 loss 0.9277228116989136 train acc 0.8105413105413105
epoch 5 batch id 401 loss 0.8097835779190063 train acc 0.8087593516209476
epoch 5 batch id 451 loss 1.0151464939117432 train acc 0.8111141906873615
epoch 5 batch id 501 loss 2.2980690002441406 train acc 0.8095059880239521
epoch 5 train acc 0.8092948717948718


 50%|█████     | 5/10 [27:30<27:20, 328.06s/it]

epoch 5 test acc 0.7521929824561403
epoch 6 batch id 1 loss 1.5183241367340088 train acc 0.8125
epoch 6 batch id 51 loss 1.3873034715652466 train acc 0.8259803921568627
epoch 6 batch id 101 loss 0.5759206414222717 train acc 0.8397277227722773
epoch 6 batch id 151 loss 1.394972324371338 train acc 0.8385761589403974
epoch 6 batch id 201 loss 0.5611498951911926 train acc 0.8370646766169154
epoch 6 batch id 251 loss 0.7833223342895508 train acc 0.8374003984063745
epoch 6 batch id 301 loss 0.7312304973602295 train acc 0.8384551495016611
epoch 6 batch id 351 loss 0.9330663681030273 train acc 0.8393874643874644
epoch 6 batch id 401 loss 1.305004596710205 train acc 0.8380610972568578
epoch 6 batch id 451 loss 1.45818293094635 train acc 0.8391075388026608
epoch 6 batch id 501 loss 0.6250144243240356 train acc 0.8388223552894212
epoch 6 train acc 0.838387573964497
epoch 6 test acc 0.7774122807017544



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:36,  1.70it/s][A
  3%|▎         | 2/63 [00:00<00:28,  2.14it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.60it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.08it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.53it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.92it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.26it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.54it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.78it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.96it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.08it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.17it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.24it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.28it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.31it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.34it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.37it/s]

epoch 7 batch id 1 loss 0.5462955236434937 train acc 0.9375
epoch 7 batch id 51 loss 1.2607892751693726 train acc 0.8909313725490197
epoch 7 batch id 101 loss 0.530347466468811 train acc 0.8935643564356436
epoch 7 batch id 151 loss 1.2788076400756836 train acc 0.8932119205298014
epoch 7 batch id 201 loss 0.427947461605072 train acc 0.8858830845771144
epoch 7 batch id 251 loss 0.5296128392219543 train acc 0.8794820717131474
epoch 7 batch id 301 loss 1.0387022495269775 train acc 0.8766611295681063
epoch 7 batch id 351 loss 1.0504752397537231 train acc 0.874465811965812
epoch 7 batch id 401 loss 0.6105303764343262 train acc 0.8714152119700748
epoch 7 batch id 451 loss 0.9051042795181274 train acc 0.8727827050997783
epoch 7 batch id 501 loss 1.43509840965271 train acc 0.8727544910179641
epoch 7 train acc 0.8716715976331361


 70%|███████   | 7/10 [38:28<16:22, 327.41s/it]

epoch 7 test acc 0.7664473684210527
epoch 8 batch id 1 loss 0.6883273124694824 train acc 0.8125
epoch 8 batch id 51 loss 0.45608168840408325 train acc 0.9019607843137255
epoch 8 batch id 101 loss 0.9646722078323364 train acc 0.9003712871287128
epoch 8 batch id 151 loss 0.7254726886749268 train acc 0.8998344370860927
epoch 8 batch id 201 loss 1.577191948890686 train acc 0.9011194029850746
epoch 8 batch id 251 loss 1.095804214477539 train acc 0.9018924302788844
epoch 8 batch id 301 loss 1.146911382675171 train acc 0.8988787375415282
epoch 8 batch id 351 loss 1.3600995540618896 train acc 0.8986823361823362
epoch 8 batch id 401 loss 0.7099359035491943 train acc 0.899002493765586
epoch 8 batch id 451 loss 0.289754182100296 train acc 0.8967572062084257
epoch 8 batch id 501 loss 1.215345859527588 train acc 0.8972055888223552
epoch 8 train acc 0.8970660749506904
epoch 8 test acc 0.7796052631578947



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.95it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.41it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.88it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.35it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.76it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.88it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 9 batch id 1 loss 0.2121458798646927 train acc 0.9375
epoch 9 batch id 51 loss 0.1155855804681778 train acc 0.8946078431372549
epoch 9 batch id 101 loss 0.3394027352333069 train acc 0.9047029702970297
epoch 9 batch id 151 loss 1.3581706285476685 train acc 0.910182119205298
epoch 9 batch id 201 loss 0.16265571117401123 train acc 0.9110696517412935
epoch 9 batch id 251 loss 0.844475507736206 train acc 0.9088645418326693
epoch 9 batch id 301 loss 0.3458920121192932 train acc 0.9096760797342193
epoch 9 batch id 351 loss 0.7179258465766907 train acc 0.9093660968660968
epoch 9 batch id 401 loss 1.1314221620559692 train acc 0.90928927680798
epoch 9 batch id 451 loss 0.4723777174949646 train acc 0.9075665188470067
epoch 9 batch id 501 loss 0.45690011978149414 train acc 0.9069361277445109
epoch 9 train acc 0.907051282051282


 90%|█████████ | 9/10 [49:28<05:27, 327.16s/it]

epoch 9 test acc 0.7752192982456141
epoch 10 batch id 1 loss 0.6012414693832397 train acc 0.8125
epoch 10 batch id 51 loss 0.6630504131317139 train acc 0.9313725490196079
epoch 10 batch id 101 loss 0.2357138991355896 train acc 0.9294554455445545
epoch 10 batch id 151 loss 0.43255987763404846 train acc 0.9304635761589404
epoch 10 batch id 201 loss 0.6299604177474976 train acc 0.929726368159204
epoch 10 batch id 251 loss 0.47042539715766907 train acc 0.9287848605577689
epoch 10 batch id 301 loss 0.9763860702514648 train acc 0.9264950166112956
epoch 10 batch id 351 loss 0.5887162685394287 train acc 0.9252136752136753
epoch 10 batch id 401 loss 1.0467529296875 train acc 0.9230049875311721
epoch 10 batch id 451 loss 0.5143899917602539 train acc 0.9221175166297118
epoch 10 batch id 501 loss 1.1042578220367432 train acc 0.9209081836327345
epoch 10 train acc 0.9208579881656804


100%|██████████| 10/10 [54:42<00:00, 328.28s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7489035087719298


100%|██████████| 63/63 [00:12<00:00,  5.21it/s]


FOLD 6
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.489984035491943 train acc 0.0
epoch 1 batch id 51 loss 5.767504692077637 train acc 0.41911764705882354
epoch 1 batch id 101 loss 3.2605435848236084 train acc 0.46410891089108913
epoch 1 batch id 151 loss 4.648105621337891 train acc 0.47019867549668876
epoch 1 batch id 201 loss 4.041841506958008 train acc 0.47823383084577115
epoch 1 batch id 251 loss 5.290891647338867 train acc 0.4793326693227092
epoch 1 batch id 301 loss 5.704828262329102 train acc 0.47799003322259137
epoch 1 batch id 351 loss 4.6548662185668945 train acc 0.4805911680911681
epoch 1 batch id 401 loss 4.417740821838379 train acc 0.48316708229426436
epoch 1 batch id 451 loss 2.7155723571777344 train acc 0.48697339246119736
epoch 1 batch id 501 loss 3.5890941619873047 train acc 0.4948852295409182
epoch 1 train acc 0.4961785009861933
epoch 1 test acc 0.6162280701754386



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.55it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.96it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.43it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.91it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.37it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.78it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.16it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.47it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.04it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.27it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 2 batch id 1 loss 4.718410015106201 train acc 0.375
epoch 2 batch id 51 loss 2.4023494720458984 train acc 0.5833333333333334
epoch 2 batch id 101 loss 3.026547908782959 train acc 0.6002475247524752
epoch 2 batch id 151 loss 2.3651585578918457 train acc 0.6150662251655629
epoch 2 batch id 201 loss 1.9500670433044434 train acc 0.6315298507462687
epoch 2 batch id 251 loss 1.366011381149292 train acc 0.6317231075697212
epoch 2 batch id 301 loss 2.590898275375366 train acc 0.6387043189368771
epoch 2 batch id 351 loss 2.2196693420410156 train acc 0.646545584045584
epoch 2 batch id 401 loss 1.85219144821167 train acc 0.650716957605985
epoch 2 batch id 451 loss 2.174534797668457 train acc 0.6568736141906873
epoch 2 batch id 501 loss 0.8704832792282104 train acc 0.6621756487025948
epoch 2 train acc 0.6618589743589743
epoch 2 test acc 0.7280701754385965



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.02it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.48it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.42it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.19it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.49it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.74it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.91it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.25it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.32it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.33it/s]

epoch 3 batch id 1 loss 2.180521011352539 train acc 0.6875
epoch 3 batch id 51 loss 1.4777097702026367 train acc 0.7647058823529411
epoch 3 batch id 101 loss 2.2185659408569336 train acc 0.7469059405940595
epoch 3 batch id 151 loss 1.4470608234405518 train acc 0.7566225165562914
epoch 3 batch id 201 loss 2.410362720489502 train acc 0.7506218905472637
epoch 3 batch id 251 loss 1.1149039268493652 train acc 0.7497509960159362
epoch 3 batch id 301 loss 1.594448447227478 train acc 0.7506229235880398
epoch 3 batch id 351 loss 1.671682596206665 train acc 0.7480413105413105
epoch 3 batch id 401 loss 1.3864789009094238 train acc 0.748285536159601
epoch 3 batch id 451 loss 1.9117642641067505 train acc 0.7459811529933481
epoch 3 batch id 501 loss 1.483860731124878 train acc 0.7463822355289421
epoch 3 train acc 0.745069033530572
epoch 3 test acc 0.7346491228070176



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.57it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.00it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.46it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.94it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.40it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.79it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.16it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.68it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.88it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.26it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.30it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 4 batch id 1 loss 1.0085957050323486 train acc 0.8125
epoch 4 batch id 51 loss 1.4438062906265259 train acc 0.7781862745098039
epoch 4 batch id 101 loss 1.8874331712722778 train acc 0.7766089108910891
epoch 4 batch id 151 loss 0.6309790015220642 train acc 0.7686258278145696
epoch 4 batch id 201 loss 1.2892693281173706 train acc 0.7699004975124378
epoch 4 batch id 251 loss 0.45121073722839355 train acc 0.7798804780876494
epoch 4 batch id 301 loss 0.9875547885894775 train acc 0.7796926910299004
epoch 4 batch id 351 loss 1.8161191940307617 train acc 0.7806267806267806
epoch 4 batch id 401 loss 1.2268035411834717 train acc 0.7830423940149626
epoch 4 batch id 451 loss 2.3518195152282715 train acc 0.7846452328159645
epoch 4 batch id 501 loss 1.3763641119003296 train acc 0.7850548902195609
epoch 4 train acc 0.7847633136094675
epoch 4 test acc 0.7642543859649122



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:37,  1.64it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.07it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.54it/s][A
  6%|▋         | 4/63 [00:01<00:19,  3.02it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.48it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.89it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.24it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.53it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.77it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.94it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.07it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.16it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.23it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.28it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.31it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.34it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.34it/s]

epoch 5 batch id 1 loss 1.0355349779129028 train acc 0.875
epoch 5 batch id 51 loss 0.9270539283752441 train acc 0.8321078431372549
epoch 5 batch id 101 loss 1.2441341876983643 train acc 0.8329207920792079
epoch 5 batch id 151 loss 0.9800897836685181 train acc 0.828228476821192
epoch 5 batch id 201 loss 0.8408280611038208 train acc 0.8255597014925373
epoch 5 batch id 251 loss 1.5230798721313477 train acc 0.8189741035856574
epoch 5 batch id 301 loss 1.2502140998840332 train acc 0.8181063122923588
epoch 5 batch id 351 loss 0.53396075963974 train acc 0.8167735042735043
epoch 5 batch id 401 loss 0.29108715057373047 train acc 0.8190461346633416
epoch 5 batch id 451 loss 1.2371656894683838 train acc 0.8206762749445676
epoch 5 batch id 501 loss 1.6662497520446777 train acc 0.8186127744510978
epoch 5 train acc 0.8181706114398422
epoch 5 test acc 0.7653508771929824



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.59it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.01it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.47it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.42it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.84it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.17it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.48it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.24it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.28it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.31it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.32it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.32it/s]

epoch 6 batch id 1 loss 0.9402031898498535 train acc 0.8125
epoch 6 batch id 51 loss 1.6976035833358765 train acc 0.8333333333333334
epoch 6 batch id 101 loss 0.7979345321655273 train acc 0.8409653465346535
epoch 6 batch id 151 loss 0.9037442803382874 train acc 0.8410596026490066
epoch 6 batch id 201 loss 0.7174078822135925 train acc 0.8426616915422885
epoch 6 batch id 251 loss 0.8468664884567261 train acc 0.8423804780876494
epoch 6 batch id 301 loss 0.4808899164199829 train acc 0.844892026578073
epoch 6 batch id 351 loss 0.8773684501647949 train acc 0.8459757834757835
epoch 6 batch id 401 loss 1.5850104093551636 train acc 0.8436720698254364
epoch 6 batch id 451 loss 0.5637120008468628 train acc 0.843680709534368
epoch 6 batch id 501 loss 0.8242521286010742 train acc 0.844061876247505
epoch 6 train acc 0.8431952662721893


 60%|██████    | 6/10 [33:45<22:14, 333.61s/it]

epoch 6 test acc 0.7521929824561403
epoch 7 batch id 1 loss 0.8073590993881226 train acc 0.875
epoch 7 batch id 51 loss 1.335184097290039 train acc 0.8786764705882353
epoch 7 batch id 101 loss 0.3683972656726837 train acc 0.8793316831683168
epoch 7 batch id 151 loss 1.097667932510376 train acc 0.8783112582781457
epoch 7 batch id 201 loss 0.9379782676696777 train acc 0.8784203980099502
epoch 7 batch id 251 loss 0.2021295726299286 train acc 0.87699203187251
epoch 7 batch id 301 loss 0.7999845743179321 train acc 0.8776993355481728
epoch 7 batch id 351 loss 0.9706434607505798 train acc 0.8760683760683761
epoch 7 batch id 401 loss 1.0087487697601318 train acc 0.877649625935162
epoch 7 batch id 451 loss 0.6061993837356567 train acc 0.8776330376940134
epoch 7 batch id 501 loss 1.3969898223876953 train acc 0.87562375249501
epoch 7 train acc 0.8758629191321499
epoch 7 test acc 0.7675438596491229



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:44,  1.39it/s][A
  3%|▎         | 2/63 [00:00<00:34,  1.78it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.23it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.70it/s][A
  8%|▊         | 5/63 [00:01<00:18,  3.17it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.60it/s][A
 11%|█         | 7/63 [00:01<00:14,  3.99it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.32it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.58it/s][A
 16%|█▌        | 10/63 [00:02<00:11,  4.79it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.95it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.06it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.15it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.20it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.23it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.22it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.26it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.28it/s]

epoch 8 batch id 1 loss 0.42775118350982666 train acc 0.9375
epoch 8 batch id 51 loss 0.8389499187469482 train acc 0.8884803921568627
epoch 8 batch id 101 loss 1.2313553094863892 train acc 0.8892326732673267
epoch 8 batch id 151 loss 0.8230832815170288 train acc 0.8948675496688742
epoch 8 batch id 201 loss 0.7513457536697388 train acc 0.8989427860696517
epoch 8 batch id 251 loss 1.2329480648040771 train acc 0.8971613545816733
epoch 8 batch id 301 loss 0.33298200368881226 train acc 0.8938953488372093
epoch 8 batch id 351 loss 0.744059681892395 train acc 0.8961894586894587
epoch 8 batch id 401 loss 1.92942214012146 train acc 0.8968204488778054
epoch 8 batch id 451 loss 0.8430218696594238 train acc 0.8937084257206208
epoch 8 batch id 501 loss 0.46582138538360596 train acc 0.8955838323353293
epoch 8 train acc 0.8960798816568047
epoch 8 test acc 0.768640350877193



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.48it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.87it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.32it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.79it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.25it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.68it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.02it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.34it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.61it/s][A
 16%|█▌        | 10/63 [00:02<00:11,  4.81it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.96it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.04it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.12it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.19it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.23it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.28it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.31it/s]

epoch 9 batch id 1 loss 0.47543543577194214 train acc 0.9375
epoch 9 batch id 51 loss 0.3100338876247406 train acc 0.9203431372549019
epoch 9 batch id 101 loss 0.7100832462310791 train acc 0.9133663366336634
epoch 9 batch id 151 loss 0.25674307346343994 train acc 0.910182119205298
epoch 9 batch id 201 loss 0.1604505479335785 train acc 0.9116915422885572
epoch 9 batch id 251 loss 0.3737794756889343 train acc 0.9101095617529881
epoch 9 batch id 301 loss 1.4990270137786865 train acc 0.9084302325581395
epoch 9 batch id 351 loss 0.8281195163726807 train acc 0.9090099715099715
epoch 9 batch id 401 loss 0.31591808795928955 train acc 0.9089775561097256
epoch 9 batch id 451 loss 1.5498571395874023 train acc 0.9068736141906873
epoch 9 batch id 501 loss 0.6354127526283264 train acc 0.9048153692614771
epoch 9 train acc 0.9050788954635108
epoch 9 test acc 0.7796052631578947



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:42,  1.45it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.86it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.31it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.79it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.25it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.67it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.06it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.39it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.66it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.00it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.17it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.30it/s]

epoch 10 batch id 1 loss 1.7786593437194824 train acc 0.6875
epoch 10 batch id 51 loss 0.2896483540534973 train acc 0.928921568627451
epoch 10 batch id 101 loss 1.7000744342803955 train acc 0.9344059405940595
epoch 10 batch id 151 loss 0.2618771195411682 train acc 0.9362582781456954
epoch 10 batch id 201 loss 0.22782638669013977 train acc 0.9325248756218906
epoch 10 batch id 251 loss 1.028909683227539 train acc 0.9260458167330677
epoch 10 batch id 301 loss 0.08774897456169128 train acc 0.9269102990033222
epoch 10 batch id 351 loss 0.36619770526885986 train acc 0.9252136752136753
epoch 10 batch id 401 loss 1.2388858795166016 train acc 0.9233167082294265
epoch 10 batch id 451 loss 0.2601461410522461 train acc 0.9240576496674058
epoch 10 batch id 501 loss 0.4006996154785156 train acc 0.9239021956087824
epoch 10 train acc 0.9240631163708086


100%|██████████| 10/10 [56:31<00:00, 339.18s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7521929824561403


100%|██████████| 63/63 [00:12<00:00,  5.18it/s]


FOLD 7
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.434867858886719 train acc 0.0
epoch 1 batch id 51 loss 3.7048065662384033 train acc 0.41544117647058826
epoch 1 batch id 101 loss 4.704128742218018 train acc 0.4511138613861386
epoch 1 batch id 151 loss 4.080653190612793 train acc 0.46192052980132453
epoch 1 batch id 201 loss 4.124473571777344 train acc 0.4748134328358209
epoch 1 batch id 251 loss 6.024170875549316 train acc 0.48256972111553786
epoch 1 batch id 301 loss 5.632912635803223 train acc 0.48401162790697677
epoch 1 batch id 351 loss 4.400571823120117 train acc 0.479522792022792
epoch 1 batch id 401 loss 2.174346446990967 train acc 0.4870635910224439
epoch 1 batch id 451 loss 4.756455421447754 train acc 0.49584257206208426
epoch 1 batch id 501 loss 4.288259029388428 train acc 0.5041167664670658
epoch 1 train acc 0.5056706114398422
epoch 1 test acc 0.6271929824561403



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.52it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.93it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.39it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.87it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.33it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.74it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.12it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.68it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.02it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 2 batch id 1 loss 2.401040554046631 train acc 0.6875
epoch 2 batch id 51 loss 1.8051447868347168 train acc 0.625
epoch 2 batch id 101 loss 2.033024311065674 train acc 0.6287128712871287
epoch 2 batch id 151 loss 3.893385887145996 train acc 0.6307947019867549
epoch 2 batch id 201 loss 2.899624824523926 train acc 0.6374378109452736
epoch 2 batch id 251 loss 1.4286097288131714 train acc 0.646414342629482
epoch 2 batch id 301 loss 2.025200366973877 train acc 0.6519933554817275
epoch 2 batch id 351 loss 2.477254867553711 train acc 0.657051282051282
epoch 2 batch id 401 loss 2.3395466804504395 train acc 0.6639650872817955
epoch 2 batch id 451 loss 0.6967912912368774 train acc 0.6665742793791575
epoch 2 batch id 501 loss 1.9408628940582275 train acc 0.6686626746506986
epoch 2 train acc 0.6687623274161736
epoch 2 test acc 0.7083333333333334



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:43,  1.44it/s][A
  3%|▎         | 2/63 [00:00<00:33,  1.84it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.29it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.76it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.23it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.65it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.02it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.35it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.60it/s][A
 16%|█▌        | 10/63 [00:02<00:11,  4.81it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.96it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.07it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.14it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.20it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.23it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.33it/s]

epoch 3 batch id 1 loss 1.4938573837280273 train acc 0.75
epoch 3 batch id 51 loss 1.0327239036560059 train acc 0.7340686274509803
epoch 3 batch id 101 loss 0.9679739475250244 train acc 0.739480198019802
epoch 3 batch id 151 loss 0.9340900182723999 train acc 0.7479304635761589
epoch 3 batch id 201 loss 2.561103343963623 train acc 0.7468905472636815
epoch 3 batch id 251 loss 1.1551902294158936 train acc 0.75
epoch 3 batch id 301 loss 1.5731236934661865 train acc 0.7491694352159468
epoch 3 batch id 351 loss 1.6516075134277344 train acc 0.7466168091168092
epoch 3 batch id 401 loss 1.129278302192688 train acc 0.7490648379052369
epoch 3 batch id 451 loss 1.5730407238006592 train acc 0.7508314855875832
epoch 3 batch id 501 loss 1.7284765243530273 train acc 0.7495009980039921
epoch 3 train acc 0.7511094674556213
epoch 3 test acc 0.7368421052631579



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.53it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.95it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.41it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.89it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.35it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.75it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.12it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.43it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.67it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.86it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.99it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.07it/s][A
 21%|██        | 13/63 [00:02<00:10,  5.00it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.10it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.16it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.23it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.27it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 4 batch id 1 loss 1.0044502019882202 train acc 0.8125
epoch 4 batch id 51 loss 2.8499884605407715 train acc 0.7830882352941176
epoch 4 batch id 101 loss 1.4385123252868652 train acc 0.7728960396039604
epoch 4 batch id 151 loss 2.22098970413208 train acc 0.7785596026490066
epoch 4 batch id 201 loss 0.9541588425636292 train acc 0.7838930348258707
epoch 4 batch id 251 loss 3.103672742843628 train acc 0.7851095617529881
epoch 4 batch id 301 loss 1.045358657836914 train acc 0.7875830564784053
epoch 4 batch id 351 loss 0.660878598690033 train acc 0.7922008547008547
epoch 4 batch id 401 loss 1.200193166732788 train acc 0.7942643391521197
epoch 4 batch id 451 loss 2.447782516479492 train acc 0.7946230598669624
epoch 4 batch id 501 loss 0.9523040056228638 train acc 0.7947854291417166
epoch 4 train acc 0.7938856015779092
epoch 4 test acc 0.7379385964912281



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:42,  1.46it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.86it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.32it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.79it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.27it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.70it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.06it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.35it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.62it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.83it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.97it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.06it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.12it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.17it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.21it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.26it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.28it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.29it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 5 batch id 1 loss 1.1127246618270874 train acc 0.875
epoch 5 batch id 51 loss 0.6331840753555298 train acc 0.8247549019607843
epoch 5 batch id 101 loss 0.48426491022109985 train acc 0.8155940594059405
epoch 5 batch id 151 loss 0.4564610719680786 train acc 0.8220198675496688
epoch 5 batch id 201 loss 1.20601487159729 train acc 0.820273631840796
epoch 5 batch id 251 loss 1.048414707183838 train acc 0.8249501992031872
epoch 5 batch id 301 loss 0.5238779187202454 train acc 0.8259966777408638
epoch 5 batch id 351 loss 1.364515781402588 train acc 0.8269230769230769
epoch 5 batch id 401 loss 1.3919832706451416 train acc 0.8271508728179551
epoch 5 batch id 451 loss 1.498754858970642 train acc 0.8273281596452328
epoch 5 batch id 501 loss 1.1140258312225342 train acc 0.8270958083832335
epoch 5 train acc 0.8264299802761341


 50%|█████     | 5/10 [28:20<28:03, 336.65s/it]

epoch 5 test acc 0.7214912280701754
epoch 6 batch id 1 loss 0.7498891353607178 train acc 0.9375
epoch 6 batch id 51 loss 0.6448862552642822 train acc 0.8627450980392157
epoch 6 batch id 101 loss 0.5785021781921387 train acc 0.8675742574257426
epoch 6 batch id 151 loss 2.0284030437469482 train acc 0.8642384105960265
epoch 6 batch id 201 loss 2.0616092681884766 train acc 0.8600746268656716
epoch 6 batch id 251 loss 0.6404538154602051 train acc 0.8585657370517928
epoch 6 batch id 301 loss 0.7674801349639893 train acc 0.8552740863787376
epoch 6 batch id 351 loss 1.0570900440216064 train acc 0.853454415954416
epoch 6 batch id 401 loss 0.8115863800048828 train acc 0.8547381546134664
epoch 6 batch id 451 loss 1.1811349391937256 train acc 0.851579822616408
epoch 6 batch id 501 loss 1.5550354719161987 train acc 0.8511726546906188
epoch 6 train acc 0.8514546351084813


 60%|██████    | 6/10 [33:35<22:00, 330.18s/it]

epoch 6 test acc 0.7368421052631579
epoch 7 batch id 1 loss 0.5646834969520569 train acc 0.9375
epoch 7 batch id 51 loss 0.878474771976471 train acc 0.8713235294117647
epoch 7 batch id 101 loss 0.9568443298339844 train acc 0.8818069306930693
epoch 7 batch id 151 loss 1.1635594367980957 train acc 0.8762417218543046
epoch 7 batch id 201 loss 0.6251112222671509 train acc 0.880907960199005
epoch 7 batch id 251 loss 0.28763192892074585 train acc 0.8804780876494024
epoch 7 batch id 301 loss 0.5763092041015625 train acc 0.8816445182724253
epoch 7 batch id 351 loss 0.6796209812164307 train acc 0.8812321937321937
epoch 7 batch id 401 loss 0.7310621738433838 train acc 0.8810785536159601
epoch 7 batch id 451 loss 0.6966302394866943 train acc 0.8820676274944568
epoch 7 batch id 501 loss 1.1031363010406494 train acc 0.8808632734530938
epoch 7 train acc 0.8812869822485208
epoch 7 test acc 0.7510964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.48it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.89it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.35it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.82it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.27it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.68it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.05it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.37it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.63it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.83it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.97it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.08it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.26it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.31it/s]

epoch 8 batch id 1 loss 0.26980137825012207 train acc 1.0
epoch 8 batch id 51 loss 0.28037023544311523 train acc 0.8958333333333334
epoch 8 batch id 101 loss 1.2649966478347778 train acc 0.9028465346534653
epoch 8 batch id 151 loss 0.1839267611503601 train acc 0.9068708609271523
epoch 8 batch id 201 loss 0.34397146105766296 train acc 0.9070273631840796
epoch 8 batch id 251 loss 0.7908288240432739 train acc 0.9026394422310757
epoch 8 batch id 301 loss 0.24341590702533722 train acc 0.9013704318936877
epoch 8 batch id 351 loss 0.8434171676635742 train acc 0.9017094017094017
epoch 8 batch id 401 loss 0.576850175857544 train acc 0.9008728179551122
epoch 8 batch id 451 loss 0.5865991711616516 train acc 0.9018847006651884
epoch 8 batch id 501 loss 1.062950611114502 train acc 0.8990768463073853
epoch 8 train acc 0.8990384615384616
epoch 8 test acc 0.7510964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.53it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.95it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.41it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.88it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.35it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.76it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.44it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.87it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.00it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.15it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.27it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 9 batch id 1 loss 0.09476619958877563 train acc 1.0
epoch 9 batch id 51 loss 0.14866504073143005 train acc 0.9301470588235294
epoch 9 batch id 101 loss 1.26686692237854 train acc 0.9232673267326733
epoch 9 batch id 151 loss 0.30548787117004395 train acc 0.9242549668874173
epoch 9 batch id 201 loss 0.9218755960464478 train acc 0.9263059701492538
epoch 9 batch id 251 loss 0.41213470697402954 train acc 0.9255478087649402
epoch 9 batch id 301 loss 0.10203927755355835 train acc 0.9260797342192691
epoch 9 batch id 351 loss 0.5250999927520752 train acc 0.9246794871794872
epoch 9 batch id 401 loss 0.6005806922912598 train acc 0.922069825436409
epoch 9 batch id 451 loss 0.773241400718689 train acc 0.9201773835920177
epoch 9 batch id 501 loss 0.9192049503326416 train acc 0.9192864271457086
epoch 9 train acc 0.9196252465483234
epoch 9 test acc 0.7664473684210527



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:43,  1.44it/s][A
  3%|▎         | 2/63 [00:00<00:33,  1.84it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.29it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.76it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.23it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.65it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.03it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.37it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.64it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.84it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.00it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.11it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.32it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.36it/s]

epoch 10 batch id 1 loss 0.5317726731300354 train acc 0.875
epoch 10 batch id 51 loss 0.22135868668556213 train acc 0.9105392156862745
epoch 10 batch id 101 loss 0.18468734622001648 train acc 0.9146039603960396
epoch 10 batch id 151 loss 0.22097519040107727 train acc 0.9172185430463576
epoch 10 batch id 201 loss 0.19270843267440796 train acc 0.9203980099502488
epoch 10 batch id 251 loss 0.15185536444187164 train acc 0.922808764940239
epoch 10 batch id 301 loss 0.31131434440612793 train acc 0.9248338870431894
epoch 10 batch id 351 loss 0.2608969807624817 train acc 0.9243233618233618
epoch 10 batch id 401 loss 1.010321855545044 train acc 0.9267456359102244
epoch 10 batch id 451 loss 1.3059035539627075 train acc 0.9253048780487805
epoch 10 batch id 501 loss 0.7790807485580444 train acc 0.9246506986027944
epoch 10 train acc 0.9245562130177515


100%|██████████| 10/10 [56:05<00:00, 336.55s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7379385964912281


100%|██████████| 63/63 [00:12<00:00,  5.15it/s]


FOLD 8
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.688092231750488 train acc 0.0
epoch 1 batch id 51 loss 4.3597002029418945 train acc 0.44607843137254904
epoch 1 batch id 101 loss 4.021404266357422 train acc 0.4771039603960396
epoch 1 batch id 151 loss 5.040887832641602 train acc 0.47392384105960267
epoch 1 batch id 201 loss 3.5801026821136475 train acc 0.4779228855721393
epoch 1 batch id 251 loss 2.962498188018799 train acc 0.4828187250996016
epoch 1 batch id 301 loss 3.0066542625427246 train acc 0.4848421926910299
epoch 1 batch id 351 loss 4.815199851989746 train acc 0.4825498575498576
epoch 1 batch id 401 loss 3.7571587562561035 train acc 0.487998753117207
epoch 1 batch id 451 loss 5.215752124786377 train acc 0.48960643015521066
epoch 1 batch id 501 loss 4.885226249694824 train acc 0.4916417165668663
epoch 1 train acc 0.4923570019723866
epoch 1 test acc 0.5175438596491229



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.57it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.98it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.45it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.93it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.39it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.79it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.16it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.46it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.13it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.20it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.33it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.34it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 2 batch id 1 loss 2.777312755584717 train acc 0.6875
epoch 2 batch id 51 loss 4.155264854431152 train acc 0.5171568627450981
epoch 2 batch id 101 loss 3.7432732582092285 train acc 0.5160891089108911
epoch 2 batch id 151 loss 2.877413749694824 train acc 0.5405629139072847
epoch 2 batch id 201 loss 2.871260166168213 train acc 0.5628109452736318
epoch 2 batch id 251 loss 3.2055959701538086 train acc 0.5717131474103586
epoch 2 batch id 301 loss 1.8391597270965576 train acc 0.5786960132890365
epoch 2 batch id 351 loss 1.8141498565673828 train acc 0.5909900284900285
epoch 2 batch id 401 loss 2.882972240447998 train acc 0.6024002493765586
epoch 2 batch id 451 loss 1.9819679260253906 train acc 0.6069844789356984
epoch 2 batch id 501 loss 0.8704698085784912 train acc 0.6133982035928144
epoch 2 train acc 0.613905325443787
epoch 2 test acc 0.6732456140350878



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:39,  1.57it/s][A
  3%|▎         | 2/63 [00:00<00:30,  1.98it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.44it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.92it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.38it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.80it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.70it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.89it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.12it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.19it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.31it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.34it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.35it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.36it/s]

epoch 3 batch id 1 loss 1.783778190612793 train acc 0.8125
epoch 3 batch id 51 loss 3.250331401824951 train acc 0.7218137254901961
epoch 3 batch id 101 loss 1.4999773502349854 train acc 0.7048267326732673
epoch 3 batch id 151 loss 1.6405205726623535 train acc 0.706953642384106
epoch 3 batch id 201 loss 2.698655605316162 train acc 0.7133084577114428
epoch 3 batch id 251 loss 1.6613590717315674 train acc 0.7151394422310757
epoch 3 batch id 301 loss 2.7531418800354004 train acc 0.7203073089700996
epoch 3 batch id 351 loss 1.2646887302398682 train acc 0.718482905982906
epoch 3 batch id 401 loss 2.069195032119751 train acc 0.718360349127182
epoch 3 batch id 451 loss 1.1109261512756348 train acc 0.7181263858093127
epoch 3 batch id 501 loss 1.1508214473724365 train acc 0.7226796407185628
epoch 3 train acc 0.7226331360946746
epoch 3 test acc 0.7116228070175439



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.62it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.04it/s][A
  5%|▍         | 3/63 [00:00<00:23,  2.51it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.98it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.44it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.85it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.20it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.50it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.73it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.03it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.25it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.29it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.33it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.35it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.36it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.35it/s]

epoch 4 batch id 1 loss 1.6889853477478027 train acc 0.75
epoch 4 batch id 51 loss 0.5526623725891113 train acc 0.8088235294117647
epoch 4 batch id 101 loss 0.9180050492286682 train acc 0.7896039603960396
epoch 4 batch id 151 loss 0.9949215650558472 train acc 0.7744205298013245
epoch 4 batch id 201 loss 1.103630542755127 train acc 0.7758084577114428
epoch 4 batch id 251 loss 1.5140410661697388 train acc 0.775398406374502
epoch 4 batch id 301 loss 2.1468863487243652 train acc 0.7742940199335548
epoch 4 batch id 351 loss 2.0077171325683594 train acc 0.7735042735042735
epoch 4 batch id 401 loss 1.8922505378723145 train acc 0.7707294264339152
epoch 4 batch id 451 loss 0.9630444049835205 train acc 0.7723115299334812
epoch 4 batch id 501 loss 0.8584502935409546 train acc 0.7718313373253493
epoch 4 train acc 0.7713264299802761
epoch 4 test acc 0.7401315789473685



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:40,  1.54it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.95it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.41it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.89it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.36it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.77it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.14it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.45it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.71it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.01it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.10it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.17it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.21it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.25it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 5 batch id 1 loss 0.9443731904029846 train acc 0.875
epoch 5 batch id 51 loss 1.493844985961914 train acc 0.8137254901960784
epoch 5 batch id 101 loss 0.7316304445266724 train acc 0.8106435643564357
epoch 5 batch id 151 loss 0.7492350339889526 train acc 0.8120860927152318
epoch 5 batch id 201 loss 0.6080290675163269 train acc 0.8072139303482587
epoch 5 batch id 251 loss 2.1419620513916016 train acc 0.811503984063745
epoch 5 batch id 301 loss 1.426920771598816 train acc 0.8131229235880398
epoch 5 batch id 351 loss 0.741533637046814 train acc 0.8114316239316239
epoch 5 batch id 401 loss 1.1262667179107666 train acc 0.8109413965087282
epoch 5 batch id 451 loss 1.4425444602966309 train acc 0.8088968957871396
epoch 5 batch id 501 loss 1.3273351192474365 train acc 0.8087574850299402
epoch 5 train acc 0.8092948717948718
epoch 5 test acc 0.7489035087719298



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.48it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.88it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.34it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.82it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.29it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.72it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.09it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.42it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.68it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.86it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.99it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.09it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.22it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.24it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.30it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 6 batch id 1 loss 0.681888997554779 train acc 0.875
epoch 6 batch id 51 loss 0.990984320640564 train acc 0.8370098039215687
epoch 6 batch id 101 loss 0.397824764251709 train acc 0.838490099009901
epoch 6 batch id 151 loss 0.5266333222389221 train acc 0.8423013245033113
epoch 6 batch id 201 loss 2.245530128479004 train acc 0.8411069651741293
epoch 6 batch id 251 loss 0.5867824554443359 train acc 0.8406374501992032
epoch 6 batch id 301 loss 0.4405727982521057 train acc 0.8409468438538206
epoch 6 batch id 351 loss 1.1846065521240234 train acc 0.8413461538461539
epoch 6 batch id 401 loss 1.2521370649337769 train acc 0.8402431421446384
epoch 6 batch id 451 loss 0.8635331392288208 train acc 0.8386917960088692
epoch 6 batch id 501 loss 0.7971566319465637 train acc 0.8362025948103793
epoch 6 train acc 0.8356755424063116


 60%|██████    | 6/10 [33:46<22:14, 333.64s/it]

epoch 6 test acc 0.7171052631578947
epoch 7 batch id 1 loss 1.1148242950439453 train acc 0.8125
epoch 7 batch id 51 loss 0.3626973032951355 train acc 0.8737745098039216
epoch 7 batch id 101 loss 0.7977203130722046 train acc 0.8731435643564357
epoch 7 batch id 151 loss 1.2271943092346191 train acc 0.8712748344370861
epoch 7 batch id 201 loss 0.4532006084918976 train acc 0.8706467661691543
epoch 7 batch id 251 loss 0.6325210332870483 train acc 0.8690239043824701
epoch 7 batch id 301 loss 1.1051543951034546 train acc 0.8669019933554817
epoch 7 batch id 351 loss 0.5142686367034912 train acc 0.8653846153846154
epoch 7 batch id 401 loss 1.4388306140899658 train acc 0.8633104738154613
epoch 7 batch id 451 loss 1.4876611232757568 train acc 0.863359201773836
epoch 7 batch id 501 loss 0.611870288848877 train acc 0.8642714570858283
epoch 7 train acc 0.8643984220907298
epoch 7 test acc 0.7664473684210527



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:43,  1.44it/s][A
  3%|▎         | 2/63 [00:00<00:33,  1.84it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.29it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.76it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.23it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.65it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.05it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.38it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.64it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.85it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.98it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.08it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.23it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.27it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.29it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.31it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 8 batch id 1 loss 1.0090298652648926 train acc 0.875
epoch 8 batch id 51 loss 1.8866310119628906 train acc 0.9056372549019608
epoch 8 batch id 101 loss 0.9406653642654419 train acc 0.9040841584158416
epoch 8 batch id 151 loss 0.21652236580848694 train acc 0.8973509933774835
epoch 8 batch id 201 loss 0.8490047454833984 train acc 0.8958333333333334
epoch 8 batch id 251 loss 0.6895791292190552 train acc 0.891683266932271
epoch 8 batch id 301 loss 1.2925171852111816 train acc 0.8901578073089701
epoch 8 batch id 351 loss 0.5633857846260071 train acc 0.8883547008547008
epoch 8 batch id 401 loss 1.116391658782959 train acc 0.8863778054862843
epoch 8 batch id 451 loss 0.5681144595146179 train acc 0.8865022172949002
epoch 8 batch id 501 loss 0.23936907947063446 train acc 0.8848552894211577
epoch 8 train acc 0.8851084812623274


 80%|████████  | 8/10 [44:46<11:01, 330.50s/it]

epoch 8 test acc 0.7554824561403509
epoch 9 batch id 1 loss 0.3030089735984802 train acc 1.0
epoch 9 batch id 51 loss 0.5600278377532959 train acc 0.9129901960784313
epoch 9 batch id 101 loss 0.6443971395492554 train acc 0.9084158415841584
epoch 9 batch id 151 loss 0.3173148036003113 train acc 0.9130794701986755
epoch 9 batch id 201 loss 0.839924693107605 train acc 0.912002487562189
epoch 9 batch id 251 loss 0.5961112976074219 train acc 0.9103585657370518
epoch 9 batch id 301 loss 0.2846652567386627 train acc 0.909468438538206
epoch 9 batch id 351 loss 0.5593830347061157 train acc 0.9095441595441596
epoch 9 batch id 401 loss 0.3253767490386963 train acc 0.9072630922693267
epoch 9 batch id 451 loss 0.5558100938796997 train acc 0.9074279379157428
epoch 9 batch id 501 loss 0.2797889709472656 train acc 0.906561876247505
epoch 9 train acc 0.9068047337278107


 90%|█████████ | 9/10 [50:02<05:25, 325.96s/it]

epoch 9 test acc 0.75
epoch 10 batch id 1 loss 0.1528862565755844 train acc 1.0
epoch 10 batch id 51 loss 0.7928551435470581 train acc 0.9178921568627451
epoch 10 batch id 101 loss 0.35426437854766846 train acc 0.926980198019802
epoch 10 batch id 151 loss 0.6970664262771606 train acc 0.9242549668874173
epoch 10 batch id 201 loss 0.5936452746391296 train acc 0.9216417910447762
epoch 10 batch id 251 loss 0.6902824640274048 train acc 0.9205677290836654
epoch 10 batch id 301 loss 0.42367279529571533 train acc 0.916735880398671
epoch 10 batch id 351 loss 1.2228971719741821 train acc 0.9152421652421653
epoch 10 batch id 401 loss 0.289592444896698 train acc 0.9177057356608479
epoch 10 batch id 451 loss 0.21159952878952026 train acc 0.9190687361419069
epoch 10 batch id 501 loss 0.1453399360179901 train acc 0.9191616766467066
epoch 10 train acc 0.9191321499013807


100%|██████████| 10/10 [55:17<00:00, 331.72s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7598684210526315


100%|██████████| 63/63 [00:12<00:00,  5.13it/s]


FOLD 9
--------------------------------


Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

epoch 1 batch id 1 loss 7.290887355804443 train acc 0.0
epoch 1 batch id 51 loss 4.486394882202148 train acc 0.4681372549019608
epoch 1 batch id 101 loss 4.105186462402344 train acc 0.4814356435643564
epoch 1 batch id 151 loss 4.565210342407227 train acc 0.486341059602649
epoch 1 batch id 201 loss 4.552341461181641 train acc 0.49129353233830847
epoch 1 batch id 251 loss 4.933840751647949 train acc 0.49128486055776893
epoch 1 batch id 301 loss 4.345096111297607 train acc 0.49335548172757476
epoch 1 batch id 351 loss 3.096285343170166 train acc 0.4926994301994302
epoch 1 batch id 401 loss 4.490478515625 train acc 0.48753117206982544
epoch 1 batch id 451 loss 2.786900043487549 train acc 0.49653547671840353
epoch 1 batch id 501 loss 1.4625699520111084 train acc 0.499875249500998
epoch 1 train acc 0.5013560157790927
epoch 1 test acc 0.5942982456140351



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.62it/s][A
  3%|▎         | 2/63 [00:00<00:29,  2.04it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.50it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.97it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.44it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.85it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.19it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.48it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.72it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.90it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  5.04it/s][A
 19%|█▉        | 12/63 [00:02<00:09,  5.14it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.21it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.19it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.24it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.28it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.31it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.32it/s][A
 30%|███       | 19/63 [00:03<00:08,  5.34it/s]

epoch 2 batch id 1 loss 2.219489097595215 train acc 0.625
epoch 2 batch id 51 loss 4.004812240600586 train acc 0.5612745098039216
epoch 2 batch id 101 loss 3.6603426933288574 train acc 0.5792079207920792
epoch 2 batch id 151 loss 3.2149710655212402 train acc 0.6005794701986755
epoch 2 batch id 201 loss 2.528437614440918 train acc 0.6178482587064676
epoch 2 batch id 251 loss 2.4165687561035156 train acc 0.6302290836653387
epoch 2 batch id 301 loss 1.7499074935913086 train acc 0.6374584717607974
epoch 2 batch id 351 loss 2.3100972175598145 train acc 0.6415598290598291
epoch 2 batch id 401 loss 2.511237144470215 train acc 0.643859102244389
epoch 2 batch id 451 loss 1.565905213356018 train acc 0.6495288248337029
epoch 2 batch id 501 loss 1.6273159980773926 train acc 0.6506986027944112
epoch 2 train acc 0.652120315581854
epoch 2 test acc 0.7006578947368421



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:42,  1.45it/s][A
  3%|▎         | 2/63 [00:00<00:32,  1.85it/s][A
  5%|▍         | 3/63 [00:01<00:26,  2.30it/s][A
  6%|▋         | 4/63 [00:01<00:21,  2.78it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.25it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.67it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.05it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.38it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.65it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.84it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.99it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.09it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.18it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.23it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.27it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.30it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.32it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.33it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.33it/s]

epoch 3 batch id 1 loss 2.601499557495117 train acc 0.5
epoch 3 batch id 51 loss 2.2023091316223145 train acc 0.7303921568627451
epoch 3 batch id 101 loss 1.2300307750701904 train acc 0.7382425742574258
epoch 3 batch id 151 loss 2.0415070056915283 train acc 0.7276490066225165
epoch 3 batch id 201 loss 2.156978130340576 train acc 0.7269900497512438
epoch 3 batch id 251 loss 2.541994094848633 train acc 0.7248505976095617
epoch 3 batch id 301 loss 1.6078250408172607 train acc 0.7269518272425249
epoch 3 batch id 351 loss 1.3807921409606934 train acc 0.7286324786324786
epoch 3 batch id 401 loss 0.9570314884185791 train acc 0.7309850374064838
epoch 3 batch id 451 loss 1.435913324356079 train acc 0.7303215077605322
epoch 3 batch id 501 loss 1.8102195262908936 train acc 0.7290419161676647
epoch 3 train acc 0.7292899408284024
epoch 3 test acc 0.7346491228070176



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:46,  1.34it/s][A
  3%|▎         | 2/63 [00:00<00:35,  1.72it/s][A
  5%|▍         | 3/63 [00:01<00:27,  2.16it/s][A
  6%|▋         | 4/63 [00:01<00:22,  2.62it/s][A
  8%|▊         | 5/63 [00:01<00:18,  3.09it/s][A
 10%|▉         | 6/63 [00:01<00:16,  3.53it/s][A
 11%|█         | 7/63 [00:01<00:14,  3.93it/s][A
 13%|█▎        | 8/63 [00:02<00:12,  4.27it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.54it/s][A
 16%|█▌        | 10/63 [00:02<00:11,  4.76it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.92it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.01it/s][A
 21%|██        | 13/63 [00:03<00:09,  5.12it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.19it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.24it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.33it/s]

epoch 4 batch id 1 loss 2.3153560161590576 train acc 0.625
epoch 4 batch id 51 loss 0.8899098038673401 train acc 0.7745098039215687
epoch 4 batch id 101 loss 2.1923279762268066 train acc 0.7667079207920792
epoch 4 batch id 151 loss 1.3583083152770996 train acc 0.7673841059602649
epoch 4 batch id 201 loss 2.277651786804199 train acc 0.7720771144278606
epoch 4 batch id 251 loss 1.6093065738677979 train acc 0.7766434262948207
epoch 4 batch id 301 loss 1.2486822605133057 train acc 0.7786544850498339
epoch 4 batch id 351 loss 1.6849479675292969 train acc 0.7747507122507122
epoch 4 batch id 401 loss 1.6259490251541138 train acc 0.7727556109725686
epoch 4 batch id 451 loss 2.100245475769043 train acc 0.7742516629711752
epoch 4 batch id 501 loss 1.9534027576446533 train acc 0.7723303393213573
epoch 4 train acc 0.7710798816568047


 40%|████      | 4/10 [22:25<33:19, 333.27s/it]

epoch 4 test acc 0.7258771929824561
epoch 5 batch id 1 loss 1.891120433807373 train acc 0.6875
epoch 5 batch id 51 loss 1.6478066444396973 train acc 0.7806372549019608
epoch 5 batch id 101 loss 1.0620410442352295 train acc 0.7970297029702971
epoch 5 batch id 151 loss 0.7182443141937256 train acc 0.7988410596026491
epoch 5 batch id 201 loss 1.0783807039260864 train acc 0.8019278606965174
epoch 5 batch id 251 loss 0.8661575317382812 train acc 0.8045318725099602
epoch 5 batch id 301 loss 1.0887770652770996 train acc 0.8044019933554817
epoch 5 batch id 351 loss 0.7908527851104736 train acc 0.8059116809116809
epoch 5 batch id 401 loss 0.8983185291290283 train acc 0.8043952618453866
epoch 5 batch id 451 loss 0.9020484685897827 train acc 0.8029379157427938
epoch 5 batch id 501 loss 0.85364830493927 train acc 0.8020209580838323
epoch 5 train acc 0.8015285996055227
epoch 5 test acc 0.7510964912280702



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:38,  1.60it/s][A
  3%|▎         | 2/63 [00:00<00:30,  2.02it/s][A
  5%|▍         | 3/63 [00:01<00:24,  2.48it/s][A
  6%|▋         | 4/63 [00:01<00:19,  2.96it/s][A
  8%|▊         | 5/63 [00:01<00:16,  3.42it/s][A
 10%|▉         | 6/63 [00:01<00:14,  3.83it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.16it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.46it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.69it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.83it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.99it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.08it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.16it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.20it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.25it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.29it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.30it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.32it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.32it/s]

epoch 6 batch id 1 loss 0.9576712846755981 train acc 0.9375
epoch 6 batch id 51 loss 1.0125346183776855 train acc 0.8529411764705882
epoch 6 batch id 101 loss 0.546766996383667 train acc 0.8589108910891089
epoch 6 batch id 151 loss 1.7761492729187012 train acc 0.8493377483443708
epoch 6 batch id 201 loss 0.970605731010437 train acc 0.8395522388059702
epoch 6 batch id 251 loss 1.3291237354278564 train acc 0.8351593625498008
epoch 6 batch id 301 loss 0.8742221593856812 train acc 0.8336794019933554
epoch 6 batch id 351 loss 1.1286818981170654 train acc 0.8331552706552706
epoch 6 batch id 401 loss 0.3489547371864319 train acc 0.8315149625935162
epoch 6 batch id 451 loss 0.48911625146865845 train acc 0.8305155210643016
epoch 6 batch id 501 loss 1.234133005142212 train acc 0.8282185628742516
epoch 6 train acc 0.828155818540434


 60%|██████    | 6/10 [33:25<22:00, 330.25s/it]

epoch 6 test acc 0.7412280701754386
epoch 7 batch id 1 loss 0.29685288667678833 train acc 1.0
epoch 7 batch id 51 loss 0.8670886158943176 train acc 0.8406862745098039
epoch 7 batch id 101 loss 1.0586633682250977 train acc 0.8545792079207921
epoch 7 batch id 151 loss 0.9971307516098022 train acc 0.8567880794701986
epoch 7 batch id 201 loss 0.33512669801712036 train acc 0.8575870646766169
epoch 7 batch id 251 loss 0.9495277404785156 train acc 0.850597609561753
epoch 7 batch id 301 loss 0.2506740689277649 train acc 0.8525747508305648
epoch 7 batch id 351 loss 0.7921199798583984 train acc 0.8552350427350427
epoch 7 batch id 401 loss 0.6288665533065796 train acc 0.8536471321695761
epoch 7 batch id 451 loss 1.2874584197998047 train acc 0.8549057649667405
epoch 7 batch id 501 loss 0.690390944480896 train acc 0.8535429141716567
epoch 7 train acc 0.8525641025641025


 70%|███████   | 7/10 [38:40<16:17, 325.68s/it]

epoch 7 test acc 0.7489035087719298
epoch 8 batch id 1 loss 1.1758222579956055 train acc 0.6875
epoch 8 batch id 51 loss 0.957421064376831 train acc 0.8762254901960784
epoch 8 batch id 101 loss 0.7053144574165344 train acc 0.8737623762376238
epoch 8 batch id 151 loss 0.21942532062530518 train acc 0.8716887417218543
epoch 8 batch id 201 loss 1.9754276275634766 train acc 0.875
epoch 8 batch id 251 loss 1.560452938079834 train acc 0.8735059760956175
epoch 8 batch id 301 loss 1.1619946956634521 train acc 0.8752076411960132
epoch 8 batch id 351 loss 1.774977445602417 train acc 0.8728632478632479
epoch 8 batch id 401 loss 0.9645154476165771 train acc 0.8756234413965087
epoch 8 batch id 451 loss 0.2376869171857834 train acc 0.8762472283813747
epoch 8 batch id 501 loss 0.6009678840637207 train acc 0.876746506986028
epoch 8 train acc 0.8772189349112426


 80%|████████  | 8/10 [43:55<10:44, 322.47s/it]

epoch 8 test acc 0.7489035087719298
epoch 9 batch id 1 loss 1.1666326522827148 train acc 0.875
epoch 9 batch id 51 loss 0.9372332096099854 train acc 0.9044117647058824
epoch 9 batch id 101 loss 0.3403216600418091 train acc 0.9077970297029703
epoch 9 batch id 151 loss 0.22800667583942413 train acc 0.902317880794702
epoch 9 batch id 201 loss 0.3272380828857422 train acc 0.9017412935323383
epoch 9 batch id 251 loss 0.5497735738754272 train acc 0.9023904382470119
epoch 9 batch id 301 loss 0.17017745971679688 train acc 0.9040697674418605
epoch 9 batch id 351 loss 0.7320120334625244 train acc 0.9031339031339032
epoch 9 batch id 401 loss 0.5692987442016602 train acc 0.900716957605985
epoch 9 batch id 451 loss 0.5170578956604004 train acc 0.8996674057649667
epoch 9 batch id 501 loss 0.3139743208885193 train acc 0.8994510978043913
epoch 9 train acc 0.8991617357001972
epoch 9 test acc 0.7763157894736842



  0%|          | 0/63 [00:00<?, ?it/s][A
  2%|▏         | 1/63 [00:00<00:41,  1.51it/s][A
  3%|▎         | 2/63 [00:00<00:31,  1.92it/s][A
  5%|▍         | 3/63 [00:01<00:25,  2.38it/s][A
  6%|▋         | 4/63 [00:01<00:20,  2.86it/s][A
  8%|▊         | 5/63 [00:01<00:17,  3.33it/s][A
 10%|▉         | 6/63 [00:01<00:15,  3.72it/s][A
 11%|█         | 7/63 [00:01<00:13,  4.07it/s][A
 13%|█▎        | 8/63 [00:01<00:12,  4.38it/s][A
 14%|█▍        | 9/63 [00:02<00:11,  4.62it/s][A
 16%|█▌        | 10/63 [00:02<00:10,  4.82it/s][A
 17%|█▋        | 11/63 [00:02<00:10,  4.96it/s][A
 19%|█▉        | 12/63 [00:02<00:10,  5.06it/s][A
 21%|██        | 13/63 [00:02<00:09,  5.11it/s][A
 22%|██▏       | 14/63 [00:03<00:09,  5.17it/s][A
 24%|██▍       | 15/63 [00:03<00:09,  5.23it/s][A
 25%|██▌       | 16/63 [00:03<00:08,  5.27it/s][A
 27%|██▋       | 17/63 [00:03<00:08,  5.29it/s][A
 29%|██▊       | 18/63 [00:03<00:08,  5.31it/s][A
 30%|███       | 19/63 [00:04<00:08,  5.30it/s]

epoch 10 batch id 1 loss 0.2494708001613617 train acc 1.0
epoch 10 batch id 51 loss 0.6018518805503845 train acc 0.9093137254901961
epoch 10 batch id 101 loss 0.1790025532245636 train acc 0.9102722772277227
epoch 10 batch id 151 loss 0.379206120967865 train acc 0.9184602649006622
epoch 10 batch id 201 loss 0.40548571944236755 train acc 0.9182213930348259
epoch 10 batch id 251 loss 0.7523946166038513 train acc 0.9170816733067729
epoch 10 batch id 301 loss 0.6097205877304077 train acc 0.9132059800664452
epoch 10 batch id 351 loss 0.34542572498321533 train acc 0.9120370370370371
epoch 10 batch id 401 loss 0.32196253538131714 train acc 0.913497506234414
epoch 10 batch id 451 loss 0.8710569143295288 train acc 0.9121396895787139
epoch 10 batch id 501 loss 1.1879795789718628 train acc 0.9114271457085829
epoch 10 train acc 0.9111193293885601


100%|██████████| 10/10 [54:55<00:00, 329.60s/it]
  0%|          | 0/63 [00:00<?, ?it/s]

epoch 10 test acc 0.7543859649122807


100%|██████████| 63/63 [00:12<00:00,  5.11it/s]


### **3. Predict - Soft Voting**

In [None]:
trunc_0 = np.load('/opt/ml/logits/trunc_0.npy')
trunc_1 = np.load('/opt/ml/logits/trunc_1.npy')
trunc_2 = np.load('/opt/ml/logits/trunc_2.npy')
trunc_3 = np.load('/opt/ml/logits/trunc_3.npy')
trunc_4 = np.load('/opt/ml/logits/trunc_4.npy')
trunc_5 = np.load('/opt/ml/logits/trunc_5.npy')
trunc_6 = np.load('/opt/ml/logits/trunc_6.npy')
trunc_7 = np.load('/opt/ml/logits/trunc_7.npy')
trunc_8 = np.load('/opt/ml/logits/trunc_8.npy')
trunc_9 = np.load('/opt/ml/logits/trunc_9.npy')

In [None]:
predictions = []

total = trunc_0 + trunc_1 + trunc_2 + trunc_3 + trunc_4 + trunc_5 + trunc_6 + trunc_7 + trunc_8 + trunc_9

pred = torch.argmax(torch.from_numpy(total), dim=-1)
predictions.extend(pred.tolist())

submission = pd.DataFrame(predictions, columns=['pred'])
submission.to_csv(os.path.join(submission_dir, '0422_submission_1.csv'), index=False)

### **4. Predict - Hard Voting**

In [None]:
output1 = pd.read_csv(os.path.join(submission_dir, "0421_submission_ver1_trunc_kfold.csv"))
output2 = pd.read_csv(os.path.join(submission_dir, "0422_ver1_soft_voting.csv")) # mask kfold
output3 = pd.read_csv(os.path.join(submission_dir, "0422_ko12.csv"))
output4 = pd.read_csv(os.path.join(submission_dir, "0422_ver5_soft_voting_5.csv"))

all_predictions = []

for i in range(len(output1)):
    outputs = [output1["pred"][i], output2["pred"][i], output3["pred"][i], output4["pred"][i]]

    common = Counter(outputs).most_common()
    
    if len(common) == 1:
        ans = Counter(outputs).most_common(1)
        all_predictions.append(ans[0][0])
    else:
        if common[0][1] == common[1][1]:
            ans = output4["pred"][i]
            all_predictions.append(ans)
        else:
            ans = Counter(outputs).most_common(1)
            all_predictions.append(ans[0][0])