In [2]:
from torch.utils.data import Dataset, DataLoader
import transformers
from torch import nn
from transformers import RobertaForTokenClassification, RobertaTokenizer, RobertaTokenizerFast
from torch.optim import AdamW
import torch
import numpy as np
import glob
from tqdm import tqdm
from sklearn.metrics import f1_score

In [3]:
config = {
    'batch_size': 8,
    'epochs': 60,
    'encoder_final_hidden_state': 768,
    'dropout': 0.2,
    'FILES_PER_BATCH': 2,
    'NUM_TOKENS_TO_PROCESS': 200,
    'lr': 1e-3
}

## Dataset class

In [31]:
class ACLNERDataset(Dataset):

    def __init__(self, path, tokenizer, class_mapping, files_per_batch = 2, num_tokens_to_process = 200):

        files = glob.glob(path)
        file_sizes = []

        self.X_data = []
        self.Y_data = []

        self.files_per_batch = files_per_batch
        self.num_tokens_to_process = num_tokens_to_process
        self.tokenizer = tokenizer
        self.class_mapping = class_mapping
        self.inv_class_mapping = {v:k for k, v in self.class_mapping.items()}

        for i in files:
            
            X = []
            Y = []
            with open(i, 'r', encoding = 'utf-8') as f:
                data = f.readlines()
                file_sizes.append(len(data))
        
                for line in data:
                    line = line.replace('  ', ' ')
                    x, y = line.split(' ')
                    X.append(x)
                    Y.append(y.replace('\n', ''))

                self.X_data.append(X)
                self.Y_data.append(Y)

        seq_len = []
        self.index_map = {}
        k = 0

        for i in range(0, len(files), files_per_batch):
            file_seq_len = 0
            
            for j in range(0, files_per_batch):
                if i+j < len(files):
                    file_seq_len = max(file_seq_len, file_sizes[i+j])

            for j in range(0, files_per_batch):
                if i+j < len(files):
                    difference = file_seq_len - file_sizes[i+j]
                    for num_pads in range(difference):
                        self.X_data[i+j].append("<pad>")
                        self.Y_data[i+j].append("O")

            for j in range(0, file_seq_len + num_tokens_to_process, num_tokens_to_process):
                for m in range(i, i + files_per_batch):
                    if m < len(files):
                        self.index_map[k] = (m, min(j, file_seq_len))
                        k += 1

        self.length = len(self.index_map)


    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        document_set, token_bounds = self.index_map[idx]
        x = self.X_data[document_set][token_bounds: token_bounds+self.num_tokens_to_process+1]
        y = self.Y_data[document_set][token_bounds: token_bounds+self.num_tokens_to_process+1]

        return x, y, document_set

    def collate(self, batch):

        batch_x = [x for x, y, doc in batch]
        batch_y = [y for x, y, doc in batch]
        document = [doc for x, y, doc in batch]

        tokenized_samples = self.tokenizer.batch_encode_plus(batch_x,
                                                             truncation=True,
                                                             padding="max_length",
                                                             max_length = 512,
                                                             return_tensors='pt',
                                                             is_split_into_words=True)
        total_adjusted_labels = []
        for k in range(0, len(tokenized_samples["input_ids"])):
            prev_wid = -1
            word_ids_list = tokenized_samples.word_ids(batch_index=k)
            existing_label_ids = batch_y[k]
            i = -1
            adjusted_label_ids = []

            if existing_label_ids != len(word_ids_list):
                existing_label_ids += ["O"] * (len(word_ids_list) - len(existing_label_ids))

            for wid in word_ids_list:
                if wid is None:
                    adjusted_label_ids.append(self.class_mapping["O"])
                elif wid != prev_wid:
                    i = i + 1
                    adjusted_label_ids.append(self.class_mapping[existing_label_ids[i]])
                    prev_wid = wid
                else:
                    label_name = self.inv_class_mapping[adjusted_label_ids[-1]].replace('B-','I-')
                    # label_name = self.class_mapping[existing_label_ids[i]]
                    # adjusted_label_ids.append(self.class_mapping[existing_label_ids[i]])
                    adjusted_label_ids.append(self.class_mapping[label_name])
            total_adjusted_labels.append(adjusted_label_ids)

        tokenized_samples["labels"] = torch.Tensor(total_adjusted_labels)
        tokenized_samples["document"] = document
        return tokenized_samples

In [151]:
class ACLNERDatasetTest(Dataset):

    def __init__(self, path, tokenizer, files_per_batch = 2, num_tokens_to_process = 200):

        files = glob.glob(path)
        file_sizes = []

        self.X_data = []

        self.files_per_batch = files_per_batch
        self.num_tokens_to_process = num_tokens_to_process
        self.tokenizer = tokenizer

        for i in files:
            
            X = []
            with open(i, 'r', encoding = 'utf-8') as f:
                data = f.readlines()
                file_sizes.append(len(data))
        
                for line in data:
                    
                    line = line.replace('  ', ' ')
                    if line == '\n':
                        x = line
                        y = None
                    else:
                        x,y = line.split(' ')
                    
                    X.append(x)

                self.X_data.append(X)

        seq_len = []
        self.index_map = {}
        k = 0

        for i in range(0, len(files), files_per_batch):
            file_seq_len = 0
            
            for j in range(0, files_per_batch):
                if i+j < len(files):
                    file_seq_len = max(file_seq_len, file_sizes[i+j])

            for j in range(0, files_per_batch):
                if i+j < len(files):
                    difference = file_seq_len - file_sizes[i+j]
                    for num_pads in range(difference):
                        self.X_data[i+j].append("<pad>")

            for j in range(0, file_seq_len + num_tokens_to_process, num_tokens_to_process):
                for m in range(i, i + files_per_batch):
                    if m < len(files):
                        self.index_map[k] = (m, min(j, file_seq_len))
                        k += 1

        self.length = len(self.index_map)


    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        document_set, token_bounds = self.index_map[idx]
        x = self.X_data[document_set][token_bounds: token_bounds+self.num_tokens_to_process+1]
        return x, document_set

    def collate(self, batch):

        batch_x = [x for x, doc in batch]
        tokenized_samples = self.tokenizer.batch_encode_plus(batch_x,
                                                             truncation=True,
                                                             padding="max_length",
                                                             max_length = 512,
                                                             return_tensors='pt',
                                                             is_split_into_words=True)
        all_word_ids_list = []
        for k in range(0, len(tokenized_samples["input_ids"])):
            word_ids_list = tokenized_samples.word_ids(batch_index=k)
            all_word_ids_list.append(word_ids_list)

        tokenized_samples["word_ids"] = all_word_ids_list
        return tokenized_samples

## Dataloaders

In [152]:
class_mapping = {"B-MethodName":0,
                 "I-MethodName":1,
                 "B-HyperparameterName":2,
                 "I-HyperparameterName":3,
                 "B-HyperparameterValue":4,
                 "I-HyperparameterValue":5,
                 "B-MetricName":6,
                 "I-MetricName":7,
                 "B-MetricValue":8,
                 "I-MetricValue":9,
                 "B-TaskName":10,
                 "I-TaskName":11,
                 "B-DatasetName":12,
                 "I-DatasetName":13,
                 "O":14
                }
id_to_class_mapping = {v:k for k, v in class_mapping.items()}

train_data_path = '../data/train/*.conll'
val_data_path = '../data/test/*.conll'
test_data_path = '../data/sciner-test/*.conll'

tokenizer = RobertaTokenizerFast.from_pretrained('ml6team/keyphrase-extraction-kbir-inspec')

train_dataset = ACLNERDataset(train_data_path, tokenizer, class_mapping, config['FILES_PER_BATCH'], config['NUM_TOKENS_TO_PROCESS'])
train_dataloader = DataLoader(train_dataset, batch_size = config['batch_size'], shuffle = False, collate_fn = train_dataset.collate)

val_dataset = ACLNERDataset(val_data_path, tokenizer, class_mapping, config['FILES_PER_BATCH'], config['NUM_TOKENS_TO_PROCESS'])
val_dataloader = DataLoader(val_dataset, batch_size = config['batch_size'], shuffle = False, collate_fn = val_dataset.collate)

test_dataset = ACLNERDatasetTest(test_data_path, tokenizer, config['FILES_PER_BATCH'], config['NUM_TOKENS_TO_PROCESS'])
test_dataloader = DataLoader(test_dataset, batch_size = config['batch_size'], shuffle = False, collate_fn = test_dataset.collate)

In [80]:
print(len(val_dataset))
print(len(train_dataset))

142
924


## Initializing the model

In [8]:
class NERModel(nn.Module):
    def __init__(self):
        super(NERModel, self).__init__()
        
        self.encoder = RobertaForTokenClassification.from_pretrained('roberta_mlm_checkpoints/checkpoint-100000')
        self.dropout = nn.Dropout(p=config['dropout'])
        
        self.classification_layer = nn.Sequential(
            nn.Linear(config['encoder_final_hidden_state'], 1024),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 15)
        )
    
    def forward(self, input_ids, attention_mask, labels, output_hidden_states, requires_grad):
        
        if requires_grad:
            for name, param in self.encoder.named_parameters():
                if '11' not in name:
                    param.requires_grad = False
        else:
            for name, param in self.encoder.named_parameters():
                param.requires_grad = False
                
        encoder_out = self.encoder(input_ids = input_ids, attention_mask = attention_mask, output_hidden_states = True)
        x = self.dropout(encoder_out.hidden_states[-1])
        x = self.classification_layer(x)
        return x, x[:, -1]

In [9]:
model = NERModel()
model = model.cuda()
scaler = torch.cuda.amp.GradScaler()
optimizer = AdamW(params = model.parameters(), lr=config['lr'])
criterion = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 100, T_mult = 3, eta_min = 1e-7)

Some weights of the model checkpoint at roberta_mlm_checkpoints/checkpoint-100000 were not used when initializing RobertaForTokenClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta_mlm_checkpoints/checkpoint-100000 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model 

## Training the model

In [12]:
def train(train_dataloader, requires_grad):
    total_train_loss = 0
    model.train()
    prev_doc = None
    batch_bar = tqdm(total=len(train_dataloader), dynamic_ncols=True, leave=False, position=0, desc='Train')
    for i, batch in enumerate(tqdm(train_dataloader)):

        optimizer.zero_grad()
        torch.cuda.empty_cache()

        input_ids = batch['input_ids'].cuda()
        attention_mask = batch['attention_mask'].cuda()
        labels = batch['labels']
        labels = labels.type(torch.LongTensor).cuda()
        documents = batch['document']

        with torch.cuda.amp.autocast():
            output= model(input_ids = input_ids, attention_mask = attention_mask, labels = labels, output_hidden_states = True, requires_grad=requires_grad)
            loss = criterion(output[0].reshape(-1, 15), labels.reshape(-1))

        if prev_doc != documents:
            prev_doc = documents
        else:
            last_hidden_state = output

        batch_loss = loss
        total_train_loss += batch_loss
        scaler.scale(loss).backward() 
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        batch_bar.set_postfix(
            loss = f"{total_train_loss/ (i+1):.4f}",
            lr = f"{optimizer.param_groups[0]['lr']}"
        )
        batch_bar.update()
    batch_bar.close()
    avg_train_loss = total_train_loss / len(train_dataloader)
    return avg_train_loss

def val(val_dataloader, val_dataset):
    total_val_loss = 0
    model.eval()
    prev_doc = None
    all_input_id_labels = []
    all_input_id_predictions = []

    batch_bar = tqdm(total=len(val_dataloader), dynamic_ncols=True, leave=False, position=0, desc='Val')
    for i, batch in enumerate(val_dataloader):
        torch.cuda.empty_cache()

        input_ids = batch['input_ids'].cuda()
        attention_mask = batch['attention_mask'].cuda()
        labels = batch['labels']
        for label in labels.detach().cpu().numpy().flatten():
            all_input_id_labels.append(label)
        labels = labels.type(torch.LongTensor).cuda()
        documents = batch['document']

        with torch.no_grad():
            output= model(input_ids = input_ids, attention_mask = attention_mask, labels = labels, output_hidden_states = True, requires_grad=False)
            predictions = output[0].reshape(-1, 15)
            loss = criterion(predictions, labels.reshape(-1))

        if prev_doc != documents:
            prev_doc = documents
        else:
            last_hidden_state = output
        # for j in range(len(input_ids)):
        #     batch_labels = extract_keyphrases(input_ids[j], predictions.argmax(dim=1), tokenizer, j)
        #     for l in batch_labels:
        #         BOI_predictions.append(class_mapping[l[1]])
        for prediction in predictions.argmax(dim=1).detach().cpu().numpy().flatten():
            all_input_id_predictions.append(prediction)

        batch_loss = loss
        total_val_loss += batch_loss
        batch_bar.set_postfix(
            loss = f"{total_val_loss/ (i+1):.4f}"
        )
        batch_bar.update()
    batch_bar.close()
    
    f1_micro = f1_score(all_input_id_labels, all_input_id_predictions, labels = list(class_mapping.keys()).remove('O'), average='micro')
    f1_macro = f1_score(all_input_id_labels, all_input_id_predictions, labels = list(class_mapping.keys()).remove('O'), average='macro')
    f1_weighted = f1_score(all_input_id_labels, all_input_id_predictions, labels = list(class_mapping.keys()).remove('O'), average='weighted')
    avg_val_loss = total_val_loss / len(val_dataloader)
    return avg_val_loss, f1_micro, f1_macro, f1_weighted

In [13]:
for epoch in range(config['epochs']):
    print("Epoch number", epoch)
    if epoch < 15:
        train_loss = train(train_dataloader, requires_grad = False)
    else:
        train_loss = train(train_dataloader, requires_grad = True)
    print("Train loss", train_loss)
    val_loss, f1_micro, f1_macro, f1_weighted = val(val_dataloader, val_dataset)
    print("Val loss", val_loss)
    print("F-1 score", f1_micro, f1_macro, f1_weighted)

Epoch number 0


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:41,  2.74it/s, los[A
Train:   2%| | 2/116 [00:00<00:26,  4.34it/s, los[A
Train:   3%| | 3/116 [00:00<00:20,  5.43it/s, los[A
Train:   3%| | 4/116 [00:00<00:18,  6.17it/s, los[A
Train:   4%| | 5/116 [00:00<00:16,  6.61it/s, los[A
Train:   5%| | 6/116 [00:01<00:15,  6.97it/s, los[A
Train:   6%| | 7/116 [00:01<00:15,  7.16it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.32it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.48it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.57it/s, lo[A
Train:   9%| | 11/116 [00:01<00:16,  6.49it/s, lo[A
Train:  10%| | 12/116 [00:01<00:15,  6.80it/s, lo[A
Train:  11%| | 13/116 [00:02<00:14,  7.07it/s, lo[A
Train:  12%| | 14/116 [00:02<00:14,  7.27it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.40it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.54it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.59it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.65it/s, lo

Train loss tensor(0.2084, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0672, device='cuda:0')
F-1 score 0.9874009683098591 0.06624449504352059 0.9811481782675483
Epoch number 1


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.04it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.88it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.83it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.79it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.73it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.71it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.77it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.72it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.73it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.77it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.74it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.72it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.71it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.73it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.71it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.76it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.71it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.68it/s, lo

Train loss tensor(0.0877, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0537, device='cuda:0')
F-1 score 0.9877998459507042 0.20759258312705056 0.9837146715949395
Epoch number 2


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.89it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.69it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.69it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.73it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.70it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.68it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.72it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.69it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.73it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.70it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.68it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.66it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.66it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.70it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.68it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.70it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.67it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.64it/s, lo

Train loss tensor(0.0715, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0424, device='cuda:0')
F-1 score 0.9885288292253521 0.18236412616700443 0.9839341723329037
Epoch number 3


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.75it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.62it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.62it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.64it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.62it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.63it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.66it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.64it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.64it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.66it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.65it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.61it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.61it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.63it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.66it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.66it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.66it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.60it/s, lo

Train loss tensor(0.0683, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0488, device='cuda:0')
F-1 score 0.9875660211267606 0.24681787574587974 0.983980405900579
Epoch number 4


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.79it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.64it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.61it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.61it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.56it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.57it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.61it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.60it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.57it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.59it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.58it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.56it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.56it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.58it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.58it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.61it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.55it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.52it/s, lo

Train loss tensor(0.0661, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0553, device='cuda:0')
F-1 score 0.985475352112676 0.29122302772972314 0.9832729715913254
Epoch number 5


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.70it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.59it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.58it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.57it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.54it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.52it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.52it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.48it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.48it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.51it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.47it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.48it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.48it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.50it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.51it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.51it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.49it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.47it/s, lo

Train loss tensor(0.0608, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0527, device='cuda:0')
F-1 score 0.9860667913732394 0.3091106213989574 0.9836799341409839
Epoch number 6


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.76it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.58it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.51it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.52it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.47it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.42it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.44it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.42it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.44it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.47it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.46it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.41it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.41it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.42it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.43it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.48it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.45it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.43it/s, lo

Train loss tensor(0.0576, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0458, device='cuda:0')
F-1 score 0.9872909330985915 0.3115221168495404 0.9842438889969267
Epoch number 7


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.50it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.42it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.41it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.41it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.38it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.36it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.34it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.34it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.37it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.39it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.35it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.36it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.35it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.35it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.37it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.39it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.38it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.36it/s, lo

Train loss tensor(0.0551, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0414, device='cuda:0')
F-1 score 0.9880474251760564 0.2977325071993225 0.9845536508448315
Epoch number 8


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.54it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.38it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.35it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.34it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.30it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.28it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.28it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.29it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.32it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.35it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.31it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.31it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.30it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.33it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.33it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.37it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.29it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.22it/s, lo

Train loss tensor(0.0536, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0385, device='cuda:0')
F-1 score 0.9883775308098591 0.2904911510913202 0.9847264265213933
Epoch number 9


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.65it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.47it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.44it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.42it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.35it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.33it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.31it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.28it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.32it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.34it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.32it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.32it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.34it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.35it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.34it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.37it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.33it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.32it/s, lo

Train loss tensor(0.0526, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0375, device='cuda:0')
F-1 score 0.9886251100352113 0.290957310677083 0.9849590257989894
Epoch number 10


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.45it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.35it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.31it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.34it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.33it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.30it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.26it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.26it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.28it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.31it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.28it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.25it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.27it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.26it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.26it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.29it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.24it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.22it/s, lo

Train loss tensor(0.0520, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0374, device='cuda:0')
F-1 score 0.9886526188380281 0.28017332439806913 0.9847722997647393
Epoch number 11


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.47it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.28it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.26it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.26it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.23it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.22it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.18it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.19it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.24it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.27it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.26it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.24it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.23it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.18it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.19it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.22it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.19it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.17it/s, lo

Train loss tensor(0.0541, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0484, device='cuda:0')
F-1 score 0.9863006161971831 0.3134474200293205 0.9839792863151211
Epoch number 12


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.36it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.27it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.26it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.26it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.24it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.19it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.18it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.18it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.18it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.22it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.21it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.19it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.20it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.18it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.22it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.23it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.17it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.15it/s, lo

Train loss tensor(0.0541, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0519, device='cuda:0')
F-1 score 0.9849389304577465 0.3328756490856945 0.9833657133709856
Epoch number 13


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.33it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.20it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.22it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.23it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.21it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.16it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.11it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.14it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.17it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.19it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.16it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.15it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.17it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.15it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.15it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.18it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.16it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.15it/s, lo

Train loss tensor(0.0523, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0521, device='cuda:0')
F-1 score 0.9847876320422535 0.3348095634014482 0.983379010245713
Epoch number 14


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.27it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.16it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.13it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.15it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.14it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.16it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.12it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.12it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.10it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.12it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.08it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.06it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.07it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.05it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.05it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.07it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:14,  7.04it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.05it/s, lo

Train loss tensor(0.0502, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0529, device='cuda:0')
F-1 score 0.9841411751760564 0.33139951597429584 0.982917256200541
Epoch number 15


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.30it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.15it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.13it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.15it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.12it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.12it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.13it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.12it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.16it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.19it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.17it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.14it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.12it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.05it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.09it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.11it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:14,  7.07it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.07it/s, lo

Train loss tensor(0.0488, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0515, device='cuda:0')
F-1 score 0.9844162632042254 0.329330813070751 0.9831569543934726
Epoch number 16


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.43it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.27it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.19it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.19it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.17it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.17it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.09it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.11it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.13it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.19it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.17it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.14it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.13it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.10it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.06it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.08it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.08it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.06it/s, lo

Train loss tensor(0.0479, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0499, device='cuda:0')
F-1 score 0.9854615977112676 0.33108743339049923 0.9836282339209677
Epoch number 17


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.35it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.22it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.18it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.17it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.16it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.16it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.14it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.15it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.17it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.22it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.18it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.13it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.12it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.10it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.10it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.11it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.08it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.07it/s, lo

Train loss tensor(0.0460, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0475, device='cuda:0')
F-1 score 0.9862731073943662 0.34780923059161156 0.9842293989522395
Epoch number 18


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.37it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.19it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.21it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.23it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.21it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.22it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.14it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.15it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.14it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.16it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.13it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.11it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.13it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.13it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.15it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.18it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.20it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.19it/s, lo

Train loss tensor(0.0448, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0454, device='cuda:0')
F-1 score 0.986878301056338 0.34088120730654925 0.9843790194425938
Epoch number 19


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.49it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.27it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.27it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.26it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.23it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.22it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.13it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.12it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.15it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.18it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.18it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.16it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.17it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.20it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.20it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.20it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.17it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.17it/s, lo

Train loss tensor(0.0434, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0432, device='cuda:0')
F-1 score 0.9874009683098591 0.34378795568405196 0.9846577630159027
Epoch number 20


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.41it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.21it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.19it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.19it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.23it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.21it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.13it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.10it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.11it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.13it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.15it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.15it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.13it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.15it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.19it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.21it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.16it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.12it/s, lo

Train loss tensor(0.0425, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0414, device='cuda:0')
F-1 score 0.9879098811619719 0.3467487179325666 0.9849531783101876
Epoch number 21


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.45it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.25it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.28it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.27it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.20it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.22it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.14it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.14it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.15it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.13it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.10it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.08it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.07it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.07it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.08it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.15it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.16it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.13it/s, lo

Train loss tensor(0.0412, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0402, device='cuda:0')
F-1 score 0.9879786531690141 0.32958664442181135 0.984896694530562
Epoch number 22


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.57it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.30it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.26it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.27it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.23it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.17it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.09it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.10it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.10it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.11it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.12it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.14it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.15it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.15it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.18it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.19it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.13it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.11it/s, lo

Train loss tensor(0.0405, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0393, device='cuda:0')
F-1 score 0.9880474251760564 0.3254068243836373 0.9849069580971178
Epoch number 23


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.35it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.20it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.17it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.17it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.14it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.14it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.18it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.14it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.15it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.19it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.17it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.14it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.13it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.10it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.10it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.12it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.11it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.09it/s, lo

Train loss tensor(0.0399, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0392, device='cuda:0')
F-1 score 0.9881024427816901 0.3260589267964276 0.9849256069375087
Epoch number 24


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.37it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.22it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.20it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.21it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.19it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.18it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.10it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.15it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.17it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.21it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.17it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.15it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.15it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.17it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.17it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.19it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.18it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.16it/s, lo

Train loss tensor(0.0389, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0387, device='cuda:0')
F-1 score 0.9880886883802817 0.3169482679044482 0.9848539422780539
Epoch number 25


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.44it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.25it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.21it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.23it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.19it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.17it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.11it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.11it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.15it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.20it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.16it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.13it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.14it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.14it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.16it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.18it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.19it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.18it/s, lo

Train loss tensor(0.0383, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0386, device='cuda:0')
F-1 score 0.9878823723591549 0.30658571492649184 0.9846749953009895
Epoch number 26


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.34it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.22it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.19it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.20it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.18it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.17it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.11it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.11it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.13it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.20it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.19it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.16it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.14it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.14it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.18it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.17it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.13it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.09it/s, lo

Train loss tensor(0.0374, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0381, device='cuda:0')
F-1 score 0.9879098811619719 0.311869759425144 0.9847899886546713
Epoch number 27


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.40it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.18it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.14it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.15it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.15it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.16it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.13it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.16it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.20it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.21it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.16it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.12it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.09it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.06it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.08it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:14,  7.10it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.08it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.07it/s, lo

Train loss tensor(0.0371, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.9879236355633803 0.3078792527317943 0.9848080505035233
Epoch number 28


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.54it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.31it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.25it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.21it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.15it/s, los[A
Train:   5%| | 6/116 [00:00<00:15,  7.09it/s, los[A
Train:   6%| | 7/116 [00:00<00:15,  7.02it/s, los[A
Train:   7%| | 8/116 [00:01<00:15,  7.06it/s, los[A
Train:   8%| | 9/116 [00:01<00:15,  7.09it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.15it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.15it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.14it/s, lo[A
Train:  11%| | 13/116 [00:01<00:14,  7.17it/s, lo[A
Train:  12%| | 14/116 [00:01<00:14,  7.18it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:14,  7.18it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.21it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.19it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.17it/s, lo

Train loss tensor(0.0367, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0375, device='cuda:0')
F-1 score 0.9879373899647887 0.31449831475068835 0.9849027516698374
Epoch number 29


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.58it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.39it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.36it/s, los[A
Train:   3%| | 4/116 [00:00<00:15,  7.35it/s, los[A
Train:   4%| | 5/116 [00:00<00:15,  7.34it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.33it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.31it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.31it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.35it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.38it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.38it/s, lo[A
Train:  10%| | 12/116 [00:01<00:14,  7.38it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.39it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.35it/s, lo[A
Train:  13%|▏| 15/116 [00:02<00:13,  7.37it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.42it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.38it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.38it/s, lo

Train loss tensor(0.0360, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.9878823723591549 0.30984447404963406 0.9848661324838917
Epoch number 30


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.94it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.70it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.69it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.73it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.68it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.68it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.67it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.66it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.67it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.67it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.64it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.63it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.62it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.64it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.63it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.66it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.65it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.65it/s, lo

Train loss tensor(0.0360, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.9878961267605634 0.30609441464927784 0.9848099769543252
Epoch number 31


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.02it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.82it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.76it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.80it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.79it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.78it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.79it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.76it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.76it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.78it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.74it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.70it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.72it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.74it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.75it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.78it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.73it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.74it/s, lo

Train loss tensor(0.0355, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.988157460387324 0.30727826803547875 0.9849672837421192
Epoch number 32


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.03it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.91it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.87it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.84it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.83it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.84it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.81it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.85it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.85it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.81it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.79it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.78it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.78it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.80it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.80it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.76it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.77it/s, lo

Train loss tensor(0.0356, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.9881437059859155 0.30113843277532004 0.9848747078631788
Epoch number 33


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.97it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.84it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.84it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.81it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.85it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.81it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.85it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.87it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.81it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.83it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.80it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.79it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.82it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.85it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.81it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0350, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0376, device='cuda:0')
F-1 score 0.9881712147887324 0.3059188992545403 0.9849121658121958
Epoch number 34


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.97it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.82it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.86it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.86it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.81it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.80it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.85it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.80it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.79it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.80it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.80it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.77it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.76it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.80it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.80it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.81it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.80it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.79it/s, lo

Train loss tensor(0.0366, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0448, device='cuda:0')
F-1 score 0.9864931778169014 0.3274679779469491 0.9841919996842406
Epoch number 35


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.02it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.83it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.83it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.83it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.77it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.80it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.84it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.86it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.88it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.82it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.78it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.80it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.83it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.83it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.86it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.84it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.81it/s, lo

Train loss tensor(0.0393, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0483, device='cuda:0')
F-1 score 0.9850352112676056 0.33264600582260345 0.9837125758963269
Epoch number 36


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.07it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.82it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.85it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.80it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.79it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.81it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.80it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.83it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.82it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.79it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.76it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.75it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.79it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.81it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.83it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.79it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.76it/s, lo

Train loss tensor(0.0375, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0479, device='cuda:0')
F-1 score 0.9853653169014085 0.3355658264020864 0.9838642006858167
Epoch number 37


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.02it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.83it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.85it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.82it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.84it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.82it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.83it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.87it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.87it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.84it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.83it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.84it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.84it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.86it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.82it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0363, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0471, device='cuda:0')
F-1 score 0.9858192121478874 0.32237570327326115 0.9839405528091599
Epoch number 38


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.05it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.87it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.82it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.81it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.80it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.82it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.81it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.81it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.85it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.85it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.81it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.83it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.83it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.84it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.89it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.83it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.81it/s, lo

Train loss tensor(0.0353, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0492, device='cuda:0')
F-1 score 0.9848288952464789 0.3413335593520321 0.983596154909807
Epoch number 39


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.09it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.89it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.82it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.85it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.85it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.83it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.88it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.85it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.86it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.87it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.87it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.85it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.84it/s, lo[A
Train:  12%| | 14/116 [00:01<00:12,  7.86it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.88it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.89it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.88it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.87it/s, lo

Train loss tensor(0.0343, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0479, device='cuda:0')
F-1 score 0.9854891065140845 0.34585249305774174 0.9839249243025987
Epoch number 40


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.97it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.83it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.86it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.83it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.80it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.85it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.83it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.85it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.86it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.84it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.78it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.81it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.82it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.83it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.86it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.85it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0331, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0476, device='cuda:0')
F-1 score 0.9859705105633803 0.35500028528906946 0.98409131095162
Epoch number 41


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.08it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.89it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.86it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.87it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.83it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.81it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.86it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.82it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.83it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.85it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.85it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.81it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.85it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.84it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.87it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.87it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.85it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.83it/s, lo

Train loss tensor(0.0322, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0470, device='cuda:0')
F-1 score 0.9859980193661971 0.35345448371003596 0.984292864178719
Epoch number 42


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.18it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.91it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.91it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.89it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.86it/s, los[A
Train:   5%| | 6/116 [00:00<00:13,  7.86it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.92it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.86it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.86it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.89it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.88it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.83it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.84it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.83it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.84it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.87it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.84it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.81it/s, lo

Train loss tensor(0.0313, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0463, device='cuda:0')
F-1 score 0.986176826584507 0.34378308119111456 0.9843365409476065
Epoch number 43


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.10it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.91it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.87it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.88it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.88it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.84it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.87it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.87it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.87it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.90it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.87it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.85it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.87it/s, lo[A
Train:  12%| | 14/116 [00:01<00:12,  7.85it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.84it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.91it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.87it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.84it/s, lo

Train loss tensor(0.0307, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0461, device='cuda:0')
F-1 score 0.9864244058098591 0.3269367582591571 0.9841222564044307
Epoch number 44


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.00it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.88it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.86it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.86it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.85it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.78it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.83it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.83it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.84it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.87it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.83it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.82it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.83it/s, lo[A
Train:  12%| | 14/116 [00:01<00:12,  7.85it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.85it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.89it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.88it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.84it/s, lo

Train loss tensor(0.0302, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0470, device='cuda:0')
F-1 score 0.986176826584507 0.3199978641146269 0.9840690425622605
Epoch number 45


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.06it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.88it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.88it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.88it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.89it/s, los[A
Train:   5%| | 6/116 [00:00<00:13,  7.89it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.87it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.86it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.84it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.87it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.84it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.82it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.84it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.83it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.86it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.89it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.86it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.82it/s, lo

Train loss tensor(0.0293, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0473, device='cuda:0')
F-1 score 0.9859017385563379 0.32637870709898775 0.9839990129390377
Epoch number 46


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.04it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.83it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.86it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.82it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.83it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.85it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.83it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.81it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.84it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.82it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.81it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.83it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.80it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.83it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.87it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.83it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0283, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0464, device='cuda:0')
F-1 score 0.9867270026408451 0.3497390837757381 0.9844360882523978
Epoch number 47


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.12it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.93it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.89it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.90it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.87it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.84it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.87it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.83it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.85it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.85it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.84it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.83it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.80it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.82it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.84it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.85it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.81it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0275, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0470, device='cuda:0')
F-1 score 0.9863968970070423 0.3305332300854658 0.9841701151640534
Epoch number 48


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.10it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.86it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.84it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.88it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.85it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.83it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.85it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.84it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.82it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.84it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.79it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.80it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.82it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.81it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.82it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.85it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.81it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.80it/s, lo

Train loss tensor(0.0272, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0468, device='cuda:0')
F-1 score 0.9868645466549296 0.3431175454758118 0.9844464728425832
Epoch number 49


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.01it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.89it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.87it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.87it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.81it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.81it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.88it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.84it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.82it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.85it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.85it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.77it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.79it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.80it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.80it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.84it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.82it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.81it/s, lo

Train loss tensor(0.0265, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0457, device='cuda:0')
F-1 score 0.9871258802816901 0.34340066760416 0.984581454096369
Epoch number 50


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.04it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.90it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.85it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.79it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.77it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.79it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.74it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.75it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.80it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.77it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.77it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.78it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.79it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.78it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.81it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.80it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.76it/s, lo

Train loss tensor(0.0256, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0470, device='cuda:0')
F-1 score 0.9871258802816901 0.3173805553758085 0.9844184960521292
Epoch number 51


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.98it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.79it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.85it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.83it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.79it/s, los[A
Train:   5%| | 6/116 [00:00<00:13,  7.86it/s, los[A
Train:   6%| | 7/116 [00:00<00:13,  7.88it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.80it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.80it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.82it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.81it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.78it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.77it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.79it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.78it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.80it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.79it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.75it/s, lo

Train loss tensor(0.0247, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0471, device='cuda:0')
F-1 score 0.9871671434859155 0.34852433281835943 0.9847205862934425
Epoch number 52


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.99it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.76it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.76it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.76it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.74it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.75it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.77it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.76it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.76it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.81it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.80it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.76it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.75it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.78it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.79it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.80it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.76it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.74it/s, lo

Train loss tensor(0.0246, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0474, device='cuda:0')
F-1 score 0.9872771786971831 0.33180471665578126 0.984531382628801
Epoch number 53


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  8.01it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.81it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.79it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.80it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.76it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.73it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.76it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.75it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.73it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.75it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.73it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.74it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.76it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.77it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:12,  7.78it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.79it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.78it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.77it/s, lo

Train loss tensor(0.0241, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0473, device='cuda:0')
F-1 score 0.9872771786971831 0.31648233017088534 0.9843998265766227
Epoch number 54


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.98it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.75it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.74it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.75it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.73it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.71it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.75it/s, los[A
Train:   7%| | 8/116 [00:01<00:13,  7.75it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.74it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.77it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.73it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.73it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.74it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.71it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.71it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:12,  7.76it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.72it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.72it/s, lo

Train loss tensor(0.0233, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0474, device='cuda:0')
F-1 score 0.9875247579225352 0.31915274619752476 0.9846335636353339
Epoch number 55


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.97it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.74it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.74it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.76it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.71it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.72it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.72it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.66it/s, los[A
Train:   8%| | 9/116 [00:01<00:13,  7.65it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.69it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.67it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.69it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.69it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.68it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.68it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.69it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.67it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.64it/s, lo

Train loss tensor(0.0231, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0484, device='cuda:0')
F-1 score 0.9869608274647887 0.3309177621601268 0.9844680298056343
Epoch number 56


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.89it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.71it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.69it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.67it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.64it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.63it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.67it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.62it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.63it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.66it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.64it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.62it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.66it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.65it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.66it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.69it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:12,  7.65it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:12,  7.60it/s, lo

Train loss tensor(0.0221, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0480, device='cuda:0')
F-1 score 0.9875522667253521 0.3327946027084165 0.9847392471757358
Epoch number 57


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.78it/s, los[A
Train:   2%| | 2/116 [00:00<00:14,  7.64it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.59it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.58it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.57it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.55it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.60it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.56it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.59it/s, los[A
Train:   9%| | 10/116 [00:01<00:13,  7.58it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.58it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.55it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.57it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.57it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.57it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.61it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.57it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.53it/s, lo

Train loss tensor(0.0221, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0478, device='cuda:0')
F-1 score 0.9875110035211268 0.3299959208805993 0.9846556919061501
Epoch number 58


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:14,  7.74it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.56it/s, los[A
Train:   3%| | 3/116 [00:00<00:14,  7.58it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.57it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.50it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.48it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.48it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.47it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.51it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.54it/s, lo[A
Train:   9%| | 11/116 [00:01<00:13,  7.52it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.49it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.49it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.49it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.51it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.53it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.51it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.48it/s, lo

Train loss tensor(0.0215, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0478, device='cuda:0')
F-1 score 0.9872221610915493 0.32922720424308616 0.9845182319039356
Epoch number 59


Train:   0%|             | 0/116 [00:00<?, ?it/s]
Train:   1%| | 1/116 [00:00<00:15,  7.55it/s, los[A
Train:   2%| | 2/116 [00:00<00:15,  7.47it/s, los[A
Train:   3%| | 3/116 [00:00<00:15,  7.48it/s, los[A
Train:   3%| | 4/116 [00:00<00:14,  7.54it/s, los[A
Train:   4%| | 5/116 [00:00<00:14,  7.49it/s, los[A
Train:   5%| | 6/116 [00:00<00:14,  7.50it/s, los[A
Train:   6%| | 7/116 [00:00<00:14,  7.53it/s, los[A
Train:   7%| | 8/116 [00:01<00:14,  7.53it/s, los[A
Train:   8%| | 9/116 [00:01<00:14,  7.52it/s, los[A
Train:   9%| | 10/116 [00:01<00:14,  7.55it/s, lo[A
Train:   9%| | 11/116 [00:01<00:14,  7.49it/s, lo[A
Train:  10%| | 12/116 [00:01<00:13,  7.46it/s, lo[A
Train:  11%| | 13/116 [00:01<00:13,  7.48it/s, lo[A
Train:  12%| | 14/116 [00:01<00:13,  7.48it/s, lo[A
Train:  13%|▏| 15/116 [00:01<00:13,  7.50it/s, lo[A
Train:  14%|▏| 16/116 [00:02<00:13,  7.52it/s, lo[A
Train:  15%|▏| 17/116 [00:02<00:13,  7.50it/s, lo[A
Train:  16%|▏| 18/116 [00:02<00:13,  7.47it/s, lo

Train loss tensor(0.0210, device='cuda:0', grad_fn=<DivBackward0>)


                                                 

Val loss tensor(0.0482, device='cuda:0')
F-1 score 0.9874697403169014 0.32299296804784944 0.984632166351046




In [14]:
torch.save(model, 'model.pth')

## Test the model

In [153]:
def test(test_dataloader):
    total_test_loss = 0
    model.eval()
    all_words = []
    all_word_predictions = []

#     batch_bar = tqdm(total=len(test_dataloader), dynamic_ncols=True, leave=False, position=0, desc='Test')
    for i, batch in enumerate(test_dataloader):
        torch.cuda.empty_cache()

        input_ids = batch['input_ids'].cuda()
        
        attention_mask = batch['attention_mask'].cuda()
        all_word_ids_list = batch['word_ids']

        with torch.no_grad():
            output= model(input_ids = input_ids, attention_mask = attention_mask, output_hidden_states = True, labels = None, requires_grad=False)
            predictions = output[0]

        for j, document_wise_predictions in enumerate(predictions):
            one_document_prediction = document_wise_predictions.argmax(-1)
            document_input_ids = input_ids[j]
            prev_word_id = -1
            temp_word_ids = []
            current_label = 'O'
            k = -1
            for word_id, prediction in zip(all_word_ids_list[j], one_document_prediction):
                if word_id == None:
                    k+=1
                    continue
                
                if prev_word_id == word_id:
                    k += 1
                    temp_word_ids.append(document_input_ids[k].item())
                else:
                    if len(temp_word_ids) > 0:
                        all_words.append(tokenizer.decode(temp_word_ids, skip_special_tokens = True))
                        all_word_predictions.append(current_label)
                        temp_word_ids = []
                        
                    current_label = id_to_class_mapping[prediction.item()]
                    k += 1
                    temp_word_ids.append(document_input_ids[k].item())
                    prev_word_id = word_id
                    
    return all_words, all_word_predictions

In [154]:
all_words, all_word_predictions = test(test_dataloader)

with open('predictions.conll', 'w') as f:
    for i,j in zip(all_words, all_word_predictions):
        if i.strip() == '':
            f.write('\n')
        else:
            f.write(i.strip() + ' ' + j + '\n')

In [150]:
!pip3 install --upgrade --force-reinstall explainaboard_client==0.0.9 explainaboard_api_client==0.2.8

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Defaulting to user installation because normal site-packages is not writeable
Collecting explainaboard_client==0.0.9
  Using cached explainaboard_client-0.0.9-py2.py3-none-any.whl (19 kB)
Collecting explainaboard_api_client==0.2.8
  Using cached explainaboard_api_client-0.2.8-py2.py3-none-any.whl (173 kB)
Collecting tqdm
  Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)
Collecting python-dateutil
  Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)
Collecting urllib3>=1.25.3
  Using cached urllib3-1.26.12-py2.py3-none-any.whl (140 kB)
Collecting six>=1.5
  Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: six, python-dateutil, urllib3, explainaboard-api-c

In [144]:
!export EB_USERNAME="pkarnani@andrew.cmu.edu"
!export EB_API_KEY="CT2Mr-yEXXek124uuO8sHg"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [146]:
!python3 -m explainaboard_client.cli.evaluate_system \
  --username $EB_USERNAME \
  --api-key $EB_API_KEY \
  --task named-entity-recognition \
  --system-name robertamlmner \
  --dataset cmu_anlp \
  --sub-dataset sciner \
  --split test \
  --system-output-file anlp_pkarnani_robertamlmner.conll \
  --system-output-file-type conll \
  --shared-users neubig@gmail.com \
  --source-language en

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Traceback (most recent call last):
  File "/usr/lib64/python3.7/runpy.py", line 183, in _run_module_as_main
    mod_name, mod_spec, code = _get_module_details(mod_name, _Error)
  File "/usr/lib64/python3.7/runpy.py", line 109, in _get_module_details
    __import__(pkg_name)
  File "/home/ec2-user/.local/lib/python3.7/site-packages/explainaboard_client/__init__.py", line 3, in <module>
    from typing import Literal
ImportError: cannot import name 'Literal' from 'typing' (/usr/lib64/python3.7/typing.py)


In [None]:
for i in 