<a href="https://colab.research.google.com/github/mandar33/BI-LSTM-CRF-FC/blob/main/BIDIR_LSTM_CRF_FC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import numpy as np
import random
import math
from tqdm import tqdm
import os
import re
import io
import matplotlib.pyplot as plt
from datetime import datetime

In [3]:

import numpy as np



class dictionary():
    def __init__(self):
        self.word_freq={}
        self.id2word={}
        self.word2id={}
    
    def add_word(self,word):
        if word in self.word_freq:
            self.word_freq[word]+=1
        else:
            self.word_freq[word]=1
        
    def create_mapping(self):
        self.word_freq['[PAD]']=1000001
        self.word_freq['[UNK]']=1000000
        c_unk=0
        dic_items=[]
        for k in self.word_freq.keys():
            if self.word_freq[k]>1 or np.random.uniform()>0.5:
                dic_items.append((k,self.word_freq[k]))
            else:
                c_unk+=1
        ordered_lis=sorted( dic_items, key=lambda x: (-x[1],x[0]))
        assert ordered_lis[0][0]=='[PAD]'
        self.id2word=dict([(i,ordered_lis[i][0]) for i in range(len(ordered_lis))])
        self.word2id=dict([(ordered_lis[i][0],i) for i in range(len(ordered_lis))])
        self.ordered_lis=ordered_lis
        return c_unk

    def get_id(self,word):
        if word in self.word2id:
            return self.word2id[word]
        else:
            return 1
    
    def get_word(self,idx):
        return self.id2word[idx]
    
    def get_len(self):
        return len(self.id2word)

In [4]:
import numpy as np
class I2B2DatasetReader(Dataset):
    def __init__(self,data_path,dic_word,dic_char,training=False):
        super(I2B2DatasetReader,self).__init__()
        #read data -> X:[[word,word,word,...],[...],[...]] Y:[[0,1,2,3...],...]
        f=open(data_path,encoding='utf-8')
        X=[[]]
        Y=[[]]

        line=f.readline()
        self.label_map=["O","B-problem","I-problem","B-test","I-test","B-treatment","I-treatment","B-MISC","I-MISC"]
        self.label_num=len(self.label_map)

        while line:
            if line=='\n':
                if len(X[-1])>0:
                    X.append([])
                    Y.append([])
            else:
                word,ner=line.split()
                assert ner in self.label_map
                word=re.sub('\d','0',word)      #replace all the digits with 0, this helps
                X[-1].append(word)
                Y[-1].append(self.label_map.index(ner))
            line=f.readline()
                
        f.close()
        if len(X[-1])==0:
            X=X[:-1]
            Y=Y[:-1]

        self.label=Y
        self.data_num=len(X)

        #get word dictionary
        if training:
            dic_word=dictionary()
            for sentence in X:
                for word in sentence:
                    dic_word.add_word(word)
            dic_word.create_mapping()

        #get word_ids: list of lists
        #encode words str->id
        self.word_ids=[]
        for i in range(len(X)):
            self.word_ids.append(list(map(lambda x:dic_word.get_id(x), X[i])))

        #get character dictionary
        if training:
            dic_char=dictionary()
            for sentence in X:
                text=''.join(sentence)
                for char in text:
                    dic_char.add_word(char)
            dic_char.create_mapping()

        #get char_ids: list of lists of lists
        self.char_ids=[]
        for sentence in X:
            s=[]
            for word in sentence:
                s.append(list(map(lambda x:dic_char.get_id(x), word)))
            self.char_ids.append(s)

        self.dic_word=dic_word
        self.dic_char=dic_char

    def __getitem__(self,index):
        return self.word_ids[index],self.char_ids[index],self.label[index]

    def __len__(self):
        return self.data_num
     



In [5]:
def expand_dic(dictionary,embedding_path,paths):
            f=open(embedding_path,encoding="utf-8")
            line=f.readline()
            word2emb={}
            while line:
                line=line.split()
                word2emb[line[0]]=torch.from_numpy(np.array(line[1:],dtype=np.str).astype(np.float))
                line=f.readline()

            words=[]
            for data_path in paths:
                f=open(data_path,encoding='utf-8')
                line=f.readline()
                while line:
                    if line=='\n':
                        pass
                    else:
                        word=line.split()[0]
                        words.append(word)
                    line=f.readline()
                f.close()

            train_len=dictionary.get_len()
            for word in words:
                if word not in dictionary.word2id and any([x in word2emb for x in [word,word.lower(),re.sub('\d','0',word.lower())]]):
                    dictionary.word2id[word]=dictionary.get_len()
                    dictionary.id2word[dictionary.get_len()]=word
                    dictionary.ordered_lis.append((word,0))
            num_add=dictionary.get_len()-train_len
            print("original word num: %d  expand num: %d"%(train_len,num_add)) 
            return dictionary,word2emb

def collate_batch(batch):
            #input is a list of tuples
            word_num=list(map(lambda x:len(x[0]),batch))
            max_word_num=max(word_num)
            word_ids=list(map(lambda x:x[0]+[0]*(max_word_num-len(x[0])),batch))
            label_ids=list(map(lambda x:x[2]+[0]*(max_word_num-len(x[2])),batch))

            max_word_length=max(list(map(lambda x:max([len(i) for i in x[1]]),batch)))
            char_ids=[]
            for tuple in batch:
                s=[]
                for word in tuple[1]:
                    s.append(word+[0]*(max_word_length-len(word)))
                s=s+[[0]*max_word_length for i in range((max_word_num-len(s)))]
                char_ids.append(s)

            word_num=torch.LongTensor(word_num)
            word_ids=torch.LongTensor(word_ids)
            char_ids=torch.LongTensor(char_ids)
            label_ids=torch.LongTensor(label_ids)

            return word_num,word_ids,char_ids,label_ids

def log_sum_exp(matrix,dim):
                maximum,_=matrix.max(dim=dim,keepdim=True)  #to avoid NaN
                return (maximum+torch.log(torch.exp(matrix-maximum).sum(dim=dim,keepdim=True))).squeeze(1)


def forward_alg(observation,transition,word_num):            
            observation=observation.transpose(1,2)
            transition=transition.unsqueeze(0).expand(observation.size(0),-1,-1)
            alpha=torch.zeros_like(observation)
            alpha[:,:,0:1]=observation[:,:,0:1]
            for i in range(1,observation.size(2)):
                alpha[:,:,i:i+1]=(observation[:,:,i]+log_sum_exp(alpha[:,:,i-1:i]+transition,dim=1)).unsqueeze(2)
            end_label=alpha[:,10,1:]     #(batch_size, sequence_len)
            return end_label.gather(1,word_num.unsqueeze(1)).squeeze(1)

def list_batch(pred,word_num,word_ids,label_ids,dic_word,label_map):
    pred=pred.tolist()
    word_num=word_num.tolist()
    label_ids=label_ids.tolist()
    word_ids=word_ids.tolist()

    outputs=[]
    for i in range(len(word_num)):
        seq_len=word_num[i]
        prediction=pred[i][:seq_len]
        target=label_ids[i][:seq_len]
        words=word_ids[i][:seq_len]
        prediction=list(map(lambda x: label_map[x], prediction))
        target=list(map(lambda x: label_map[x], target))
        words=list(map(lambda x: dic_word.get_word(x), words))
        for j in range(seq_len):
            outputs.append(' '.join([words[j],target[j],prediction[j]]))
        outputs.append('')
    
    return outputs  

In [6]:
class LSTM_CRF(nn.Module):
    def __init__(self,word2emb,dic_word,dic_char):
        super(LSTM_CRF, self).__init__()
        word_emb_dim=300
        word_lstm_dim=300
        char_emb_dim=25
        char_lstm_dim=25
        label_num=9
        dropout_rate=0.4
            
        word_emb=nn.Embedding(dic_word.get_len(),word_emb_dim,padding_idx=0)
        for i in range(dic_word.get_len()):
            word=dic_word.ordered_lis[i][0]
            if word in word2emb:
                word_emb.weight.data[i]=word2emb[word]
            elif word.lower() in word2emb:
                word_emb.weight.data[i]=word2emb[word.lower()]
            elif re.sub('\d','0',word.lower()) in word2emb:
                word_emb.weight.data[i]=word2emb[re.sub('\d','0',word.lower())]
        #print(word_emb.weight.data[0])


        char_emb=nn.Embedding(dic_char.get_len(),char_emb_dim,padding_idx=0)

        self.char_emb=char_emb
        self.char_lstm=nn.LSTM(char_emb_dim,char_lstm_dim,batch_first=True,bidirectional=True)
        self.word_emb=word_emb
        self.dropout=nn.Dropout(dropout_rate)
        self.word_lstm1=nn.LSTM(word_emb_dim+char_lstm_dim*2, word_lstm_dim,batch_first=True,bidirectional=True)
        self.word_lstm2=nn.LSTM(word_lstm_dim*2, word_lstm_dim,batch_first=True,bidirectional=True)
        self.word_lstm3=nn.LSTM(word_lstm_dim*2, word_lstm_dim,batch_first=True,bidirectional=True)
        self.word_lstm4=nn.LSTM(word_lstm_dim*2, word_lstm_dim,batch_first=True,bidirectional=True)
        self.fc = nn.Linear(word_lstm_dim*2, label_num)
        
        #crf transition matrix parameter
        self.transition=nn.Parameter(torch.full((label_num+2,label_num+2),math.log(1/label_num)))
        
        self.char_lstm_dim=char_lstm_dim
        self.char_emb_dim=char_emb_dim
        self.word_lstm_dim=word_lstm_dim
        self.word_emb_dim=word_emb_dim
        self.label_num=label_num
        
    def get_feature(self,word_num,word_ids,char_ids):
        batch_size=word_ids.size(0)
        sequence_len=word_ids.size(1)
        char_input=self.char_emb(char_ids)
        #print(char_input.size())    #4 dimensional
        char_emb_dim=char_input.size(3)
        word_len=char_input.size(2)
        char_input=char_input.view(batch_size*sequence_len,word_len,char_emb_dim)
        char_hidden,_=self.char_lstm(char_input)    #second output "_" is equal to char_output below
        forward_=char_hidden[:,-1,:self.char_lstm_dim]
        backward_=char_hidden[:,0,self.char_lstm_dim:]
        char_output=torch.cat((forward_,backward_),dim=-1)
        char_output=char_output.view(batch_size,sequence_len,self.char_lstm_dim*2)

        index=torch.LongTensor(list(range(sequence_len))).cuda().unsqueeze(0).expand(batch_size,sequence_len)
        condition=word_num.unsqueeze(1).expand(batch_size,sequence_len)>index
        mask=torch.where(condition,torch.ones(1,).cuda(),torch.zeros(1,).cuda()).unsqueeze(2)
        char_output*=mask   #to mask all the padding tokens

        word_feature=self.word_emb(word_ids)
        word_feature=torch.cat((word_feature,char_output),dim=-1)
        word_feature=self.dropout(word_feature)
        h1,(h1_T,c1_T)=self.word_lstm1(word_feature)
        h2,(h2_T,c2_T)=self.word_lstm2(h1)
        h3,(h3_T,c3_T)=self.word_lstm3(h2)
        h4,(h4_T,c4_T)=self.word_lstm4(h3)        
        word_feature = self.fc(h4)
        
        word_feature*=mask
      
        return word_feature,mask
    
    def forward(self,word_num,word_ids,char_ids,label_ids):    
        batch_size=word_ids.size(0)
        sequence_len=word_ids.size(1)
        word_feature,mask=self.get_feature(word_num,word_ids,char_ids)
        
        #compute numerator: the score of target label sequence
        numerator=word_feature.gather(2,label_ids.unsqueeze(2)).squeeze(2).sum(dim=1)
        #print(numerator.size())
        padded_label=torch.cat((torch.full((batch_size,1),9,dtype=torch.long).cuda(), label_ids), dim=1)

        #print(self.transition[(padded_label[:,:-1],padded_label[:,1:])].size()) 
        #a tensor can be indexed by several LongTensors or lists, each of them corresponds with an axis
        trans_score=self.transition[(padded_label[:,:-1],padded_label[:,1:])]   #size(batch_size,sequence_len)
        trans_score*=mask.squeeze(2)
        numerator+=trans_score.sum(dim=1)
        last_label=(padded_label.gather(1,word_num.unsqueeze(1))).squeeze()
        numerator+=self.transition[(last_label,torch.full((batch_size,),10,dtype=torch.long).cuda())]
        
        #prepare the observation matrix
        small=-1000
        se_label=torch.full((batch_size,sequence_len,2),small,dtype=torch.float).cuda()*mask
        observation=torch.cat((word_feature,se_label),dim=2)
        observation=torch.cat((torch.full((batch_size,1,self.label_num+2),small,dtype=torch.float).cuda(),
                                observation,
                                torch.full((batch_size,1,self.label_num+2),small,dtype=torch.float).cuda()),dim=1)
        
       
        observation[:,0,9]=0
        observation[:,-1,10]=0

        denominator=forward_alg(observation,self.transition,word_num)   #the score of all the label sequences
        loss=-(numerator-denominator)

        return torch.mean(loss)
    
    #viterbi decoder
    def decode(self,word_num,word_ids,char_ids):
        batch_size=word_ids.size(0)
        sequence_len=word_ids.size(1)
        word_feature,mask=self.get_feature(word_num,word_ids,char_ids)

        index=torch.LongTensor(list(range(sequence_len))).cuda().unsqueeze(0).expand(batch_size,sequence_len)
        condition=word_num.unsqueeze(1).expand(batch_size,sequence_len)==index
        end_mask=torch.where(condition,torch.ones(1,).cuda(),torch.zeros(1,).cuda()).unsqueeze(2)

        small=-1000
        constrain=torch.full((batch_size,sequence_len,self.label_num+2),small,dtype=torch.float).cuda()*end_mask
        constrain[:,:,10]=0         #tensor "constrain" is used to make sure all the paths finish at [END] state

        se_label=torch.full((batch_size,sequence_len,2),small,dtype=torch.float).cuda()*mask    #correspond with Start and End label
        observation=torch.cat((word_feature,se_label),dim=2)
        observation+=constrain
        observation=torch.cat((torch.full((batch_size,1,self.label_num+2),small,dtype=torch.float).cuda(),
                                observation,
                                torch.full((batch_size,1,self.label_num+2),small,dtype=torch.float).cuda()),dim=1)
        observation[:,0,9]=0
        observation[:,-1,10]=0
       
        observation=observation.transpose(1,2)
        path=torch.zeros_like(observation).long().cuda()
        transition=self.transition.unsqueeze(0).expand(batch_size,-1,-1)
        z=observation[:,:,0:1]
        for i in range(1,observation.size(2)):
            values,indices=(z+transition).max(dim=1)
            path[:,:,i]=indices
            values+=observation[:,:,i]
            z=values.unsqueeze(2)

        last=path[:,10,-1:]
        pred=last
        for i in range(path.size(2)-2,1,-1):
            last=path[:,:,i].gather(1,last)
            pred=torch.cat((last,pred),dim=1)   
        
       
        #validation     this step is unnecessary, just to make sure there is nothing wrong
        pred_=torch.cat((pred,torch.full((batch_size,1),10,dtype=torch.long).cuda()),dim=1)
        condition=pred_.gather(1,word_num.unsqueeze(1)).squeeze(1)==10
        assert condition.size(0)==(condition.sum().item())
        #print("validation passed")

        return pred


In [7]:
pip install conlleval

Collecting conlleval
  Downloading conlleval-0.2-py3-none-any.whl (5.4 kB)
Installing collected packages: conlleval
Successfully installed conlleval-0.2


In [None]:
import re


import matplotlib.pyplot as plt

def my_plot(epochs, datalist):
    plt.plot(epochs, datalist)
    # Add title and axis names
    plt.title('Mean Loss by Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Mean Loss') 
    #legend = plt.legend(loc='upper right')
    plt.show()



BATCH_SIZE=64
LR=0.001
CLIP=5.
num_epochs = 200
train_dataset=I2B2DatasetReader('./datafiles/train.txt',dictionary(),dictionary(),training=True)
train_dataset.dic_word,word2emb=expand_dic(train_dataset.dic_word,"datafiles/glove.6B.300d.txt",['./datafiles/dev.txt','./datafiles/test.txt'])
dev_dataset=I2B2DatasetReader('./datafiles/dev.txt',train_dataset.dic_word,train_dataset.dic_char)
test_dataset=I2B2DatasetReader('./datafiles/test.txt',train_dataset.dic_word,train_dataset.dic_char)

train_loader=DataLoader(train_dataset,BATCH_SIZE,shuffle=True,num_workers=2,collate_fn=collate_batch)
dev_loader=DataLoader(dev_dataset,BATCH_SIZE,shuffle=False,num_workers=2,collate_fn=collate_batch)
test_loader=DataLoader(test_dataset,BATCH_SIZE,shuffle=False,num_workers=2,collate_fn=collate_batch)


model = LSTM_CRF(word2emb,train_dataset.dic_word,train_dataset.dic_char).cuda()
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),LR)
precision_list = []
accuracy_list = []
recall_list = []
best_score=0
best_f1score_validation = 0
loss_vals=  []
loss_vals_validation =  []
for epoch in range(num_epochs):
    model.train()
    epoch_loss=[]
    pbar=tqdm(total=len(train_loader))
    for i,(word_num,word_ids,char_ids,label_ids) in enumerate(train_loader):
        loss=model(word_num.cuda(),word_ids.cuda(),char_ids.cuda(),label_ids.cuda())
        optimizer.zero_grad()
        loss.backward()
        #nn.utils.clip_grad_norm_(lstm_crf.parameters(),CLIP)
        optimizer.step()
        epoch_loss.append(loss.item())
        pbar.update(1)
    pbar.close()
    loss_vals.append(sum(epoch_loss)/len(epoch_loss))
    mean_loss=torch.mean(torch.tensor(epoch_loss)).item()
   

    model.eval()
    f1_score=0

    
    #for loader in (test_loader):
    testloader=test_loader

    outputs=[]
    for i,(word_num,word_ids,char_ids,label_ids) in enumerate(testloader):
        word_num,word_ids,char_ids,label_ids=word_num.cuda(),word_ids.cuda(),char_ids.cuda(),label_ids.cuda()
        pred=model.decode(word_num,word_ids,char_ids)
        outputs+=list_batch(pred,word_num,word_ids,label_ids, train_dataset.dic_word, train_dataset.label_map)

    f=open('outputs.txt','w',encoding='utf-8')
    f.write('\n'.join(outputs))
    f.close()
    
    os.system("python -m conlleval outputs.txt > results")
    #os.system("./conlleval < outputs.txt > results")
    f=open('results','r',encoding='utf-8')
    
    elems = re.split('[:;]+', f.readlines()[1])
    f1_score=float(elems[7])
    # recall_list.append(elems[5])
    # precision_list.append(elems[3])
    # accuracy_list.append(elems[1])
    f.close()
    best_score=max(best_score,f1_score)
    #print('epoch %d:  mean accuracy: %.4f  mean precision: %.2f  mean recall: %.2f'%(epoch,mean_accuracy,mean_precision,mean_recall))

    print('epoch %d:  mean loss: %.4f  f1 score: %.2f  best: %.2f'%(epoch,mean_loss,f1_score,best_score))

my_plot(np.linspace(1, num_epochs, num_epochs).astype(int), loss_vals)  



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys


original word num: 7687  expand num: 1776


100%|██████████| 183/183 [00:14<00:00, 12.23it/s]


epoch 0:  mean loss: 6.0512  f1 score: 52.72  best: 52.72


100%|██████████| 183/183 [00:14<00:00, 12.35it/s]


epoch 1:  mean loss: 2.6521  f1 score: 69.94  best: 69.94


100%|██████████| 183/183 [00:15<00:00, 12.06it/s]


epoch 2:  mean loss: 1.7879  f1 score: 73.28  best: 73.28


100%|██████████| 183/183 [00:15<00:00, 12.04it/s]


epoch 3:  mean loss: 1.3297  f1 score: 75.37  best: 75.37


100%|██████████| 183/183 [00:15<00:00, 12.07it/s]


epoch 4:  mean loss: 1.0093  f1 score: 76.34  best: 76.34


100%|██████████| 183/183 [00:15<00:00, 12.18it/s]


epoch 5:  mean loss: 0.7970  f1 score: 76.76  best: 76.76


100%|██████████| 183/183 [00:15<00:00, 11.93it/s]


epoch 6:  mean loss: 0.6413  f1 score: 77.03  best: 77.03


100%|██████████| 183/183 [00:15<00:00, 11.96it/s]


epoch 7:  mean loss: 0.5549  f1 score: 78.75  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.95it/s]


epoch 8:  mean loss: 0.4590  f1 score: 78.71  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.86it/s]


epoch 9:  mean loss: 0.3515  f1 score: 78.25  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 12.00it/s]


epoch 10:  mean loss: 0.3063  f1 score: 78.25  best: 78.75


100%|██████████| 183/183 [00:16<00:00, 11.03it/s]


epoch 11:  mean loss: 0.2738  f1 score: 78.57  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.87it/s]


epoch 12:  mean loss: 0.2271  f1 score: 78.71  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.83it/s]


epoch 13:  mean loss: 0.1953  f1 score: 78.35  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.63it/s]


epoch 14:  mean loss: 0.1922  f1 score: 78.74  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.76it/s]


epoch 15:  mean loss: 0.1669  f1 score: 78.75  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.76it/s]


epoch 16:  mean loss: 0.1435  f1 score: 78.75  best: 78.75


100%|██████████| 183/183 [00:15<00:00, 11.52it/s]


epoch 17:  mean loss: 0.1200  f1 score: 79.98  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.75it/s]


epoch 18:  mean loss: 0.1249  f1 score: 76.37  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.71it/s]


epoch 19:  mean loss: 0.1244  f1 score: 78.20  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.75it/s]


epoch 20:  mean loss: 0.1344  f1 score: 78.26  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.62it/s]


epoch 21:  mean loss: 0.1055  f1 score: 79.52  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.65it/s]


epoch 22:  mean loss: 0.1122  f1 score: 78.43  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.58it/s]


epoch 23:  mean loss: 0.0845  f1 score: 78.62  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.89it/s]


epoch 24:  mean loss: 0.0828  f1 score: 78.93  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.67it/s]


epoch 25:  mean loss: 0.0845  f1 score: 78.83  best: 79.98


100%|██████████| 183/183 [00:16<00:00, 11.37it/s]


epoch 26:  mean loss: 0.0835  f1 score: 78.70  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.78it/s]


epoch 27:  mean loss: 0.0875  f1 score: 78.62  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.47it/s]


epoch 28:  mean loss: 0.0886  f1 score: 79.37  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.89it/s]


epoch 29:  mean loss: 0.0901  f1 score: 79.30  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.80it/s]


epoch 30:  mean loss: 0.0745  f1 score: 79.60  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.86it/s]


epoch 31:  mean loss: 0.0567  f1 score: 78.38  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.85it/s]


epoch 32:  mean loss: 0.0709  f1 score: 79.20  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.81it/s]


epoch 33:  mean loss: 0.0622  f1 score: 78.45  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.85it/s]


epoch 34:  mean loss: 0.0617  f1 score: 78.80  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.78it/s]


epoch 35:  mean loss: 0.0681  f1 score: 78.76  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.83it/s]


epoch 36:  mean loss: 0.0550  f1 score: 79.44  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.85it/s]


epoch 37:  mean loss: 0.0605  f1 score: 79.14  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.65it/s]


epoch 38:  mean loss: 0.0490  f1 score: 77.36  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.78it/s]


epoch 39:  mean loss: 0.0508  f1 score: 77.79  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.76it/s]


epoch 40:  mean loss: 0.0569  f1 score: 78.39  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.87it/s]


epoch 41:  mean loss: 0.0543  f1 score: 78.56  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.76it/s]


epoch 42:  mean loss: 0.0495  f1 score: 78.42  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.82it/s]


epoch 43:  mean loss: 0.0527  f1 score: 78.05  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.71it/s]


epoch 44:  mean loss: 0.0445  f1 score: 78.83  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.78it/s]


epoch 45:  mean loss: 0.0485  f1 score: 78.86  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.66it/s]


epoch 46:  mean loss: 0.0465  f1 score: 79.15  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.75it/s]


epoch 47:  mean loss: 0.0496  f1 score: 79.21  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.80it/s]


epoch 48:  mean loss: 0.0405  f1 score: 79.29  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.77it/s]


epoch 49:  mean loss: 0.0339  f1 score: 79.52  best: 79.98


100%|██████████| 183/183 [00:15<00:00, 11.69it/s]


epoch 50:  mean loss: 0.0425  f1 score: 80.01  best: 80.01


100%|██████████| 183/183 [00:15<00:00, 11.86it/s]


epoch 51:  mean loss: 0.0459  f1 score: 80.06  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.73it/s]


epoch 52:  mean loss: 0.0419  f1 score: 78.68  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.81it/s]


epoch 53:  mean loss: 0.0365  f1 score: 79.08  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.84it/s]


epoch 54:  mean loss: 0.0429  f1 score: 79.09  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.80it/s]


epoch 55:  mean loss: 0.0432  f1 score: 78.74  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.75it/s]


epoch 56:  mean loss: 0.0423  f1 score: 79.32  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.84it/s]


epoch 57:  mean loss: 0.0380  f1 score: 78.60  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.85it/s]


epoch 58:  mean loss: 0.0306  f1 score: 79.02  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.83it/s]


epoch 59:  mean loss: 0.0323  f1 score: 79.09  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.84it/s]


epoch 60:  mean loss: 0.0351  f1 score: 78.72  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.82it/s]


epoch 61:  mean loss: 0.0308  f1 score: 79.43  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.93it/s]


epoch 62:  mean loss: 0.0333  f1 score: 79.78  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.66it/s]


epoch 63:  mean loss: 0.0420  f1 score: 79.15  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.67it/s]


epoch 64:  mean loss: 0.0372  f1 score: 79.00  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.75it/s]


epoch 65:  mean loss: 0.0355  f1 score: 79.93  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.91it/s]


epoch 66:  mean loss: 0.0287  f1 score: 78.98  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.77it/s]


epoch 67:  mean loss: 0.0264  f1 score: 79.18  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.82it/s]


epoch 68:  mean loss: 0.0294  f1 score: 79.10  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.81it/s]


epoch 69:  mean loss: 0.0262  f1 score: 79.75  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.96it/s]


epoch 70:  mean loss: 0.0311  f1 score: 79.40  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.87it/s]


epoch 71:  mean loss: 0.0270  f1 score: 79.61  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.81it/s]


epoch 72:  mean loss: 0.0283  f1 score: 78.94  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.79it/s]


epoch 73:  mean loss: 0.0277  f1 score: 79.75  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.74it/s]


epoch 74:  mean loss: 0.0302  f1 score: 79.27  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 12.01it/s]


epoch 75:  mean loss: 0.0295  f1 score: 78.75  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.63it/s]


epoch 76:  mean loss: 0.0299  f1 score: 79.17  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.99it/s]


epoch 77:  mean loss: 0.0266  f1 score: 79.30  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.83it/s]


epoch 78:  mean loss: 0.0269  f1 score: 79.34  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.72it/s]


epoch 79:  mean loss: 0.0255  f1 score: 79.24  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.86it/s]


epoch 80:  mean loss: 0.0251  f1 score: 79.38  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.62it/s]


epoch 81:  mean loss: 0.0260  f1 score: 79.44  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.78it/s]


epoch 82:  mean loss: 0.0235  f1 score: 79.39  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.84it/s]


epoch 83:  mean loss: 0.0301  f1 score: 79.91  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.72it/s]


epoch 84:  mean loss: 0.0247  f1 score: 79.85  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.77it/s]


epoch 85:  mean loss: 0.0226  f1 score: 79.46  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 12.02it/s]


epoch 86:  mean loss: 0.0249  f1 score: 79.44  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.72it/s]


epoch 87:  mean loss: 0.0219  f1 score: 79.44  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.80it/s]


epoch 88:  mean loss: 0.0253  f1 score: 79.42  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.84it/s]


epoch 89:  mean loss: 0.0280  f1 score: 79.85  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.91it/s]


epoch 90:  mean loss: 0.0244  f1 score: 79.49  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.86it/s]


epoch 91:  mean loss: 0.0244  f1 score: 79.53  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 11.99it/s]


epoch 92:  mean loss: 0.0196  f1 score: 79.46  best: 80.06


100%|██████████| 183/183 [00:15<00:00, 12.01it/s]


epoch 93:  mean loss: 0.0191  f1 score: 79.45  best: 80.06


  8%|▊         | 15/183 [00:01<00:13, 12.60it/s]