In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch import optim
from torch.optim import lr_scheduler

import transformers
from transformers.optimization import Adafactor, AdafactorSchedule

from sklearn.metrics import fbeta_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os
import random
import time
from tqdm.notebook import tqdm
import datetime as dt
import copy
import matplotlib.pyplot as plt


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]='0,1,6,7'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cuda


In [3]:
model_name_dict = {
    "PubMedBERT": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext",
    "biomed_roberta_base": "allenai/biomed_roberta_base",
    "Bio_ClinicalBERT":"emilyalsentzer/Bio_ClinicalBERT",
}

class Hparams:
    def __init__(self):
        self.random_seed = 0 # BAD:2021
        self.data_dir = './data'
        self.output_dir = './outputs'
        self.batch_size = 128
        self.token_max_length = 512
        self.model_name = model_name_dict['PubMedBERT']
        self.num_epochs = 5
        self.class_1_weight = 150
        self.initial_lr = 2e-5  # 2e-5
        self.model_type = 'lstm_ex'  # cnn, lstm, lstm_ex
        self.upsample_pos_n = 1
        self.use_col = 'title_abstract'  # title, abstract, title_abstract
        self.train_argument = True
        self.test_size = 0.0  # 0.2
        self.cv_n = 5
    

hps = Hparams()


def seed_torch(seed:int):
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(hps.random_seed)

## DataFrame

In [43]:
orig_df = pd.read_csv(os.path.join(hps.data_dir, 'train.csv'), index_col=0)
submit_df = pd.read_csv(os.path.join(hps.data_dir, 'test.csv'), index_col=0)
sample_submit_df = pd.read_csv(os.path.join(hps.data_dir, 'sample_submit.csv'), index_col=0, header=None, names=['judgement'])

# 修正
orig_df.loc[2488, 'judgement'] = 0
orig_df.loc[7708, 'judgement'] = 0

# 補完
orig_df['abstract'].fillna('', inplace=True)
orig_df['title_abstract'] = orig_df.title + orig_df.abstract

submit_df['abstract'].fillna('', inplace=True)
submit_df['title_abstract'] = submit_df.title + submit_df.abstract
submit_df['judgement'] = -1
submit_df.reset_index(inplace=True, drop=False)

## Cross Validations SetUp

In [6]:
if hps.test_size > 0.0:
    train_df, test_df = train_test_split(orig_df, test_size=hps.test_size, random_state=hps.random_seed, shuffle=True, stratify=orig_df.judgement)
else:
    train_df = orig_df.copy()
    test_df = orig_df.copy()

In [7]:
def get_cv_number(df, cv_n):

    df['cv_id'] = 0

    neg_idx = df.loc[df.judgement==0].index.tolist()
    pos_idx = df.loc[df.judgement==1].index.tolist()

    neg_idx = [list(a) for a in list(np.array_split(random.sample(neg_idx, len(neg_idx)), cv_n))]
    pos_idx = [list(a) for a in list(np.array_split(random.sample(pos_idx, len(pos_idx)), cv_n))]

    for i in range(cv_n):
        n_id = neg_idx[i]
        p_id = pos_idx[i]
        df.loc[n_id, 'cv_id'] = i
        df.loc[p_id, 'cv_id'] = i

    df = df.sort_index()

    for i in range(cv_n):
        tmp_df = df.loc[df.cv_id==i]
        print('cv_id:', i, '->  pos:', len(tmp_df.loc[tmp_df.judgement==1]), ' / neg:', len(tmp_df.loc[tmp_df.judgement==0]), ' / all:', len(tmp_df))
        
    return df


train_df = get_cv_number(train_df, cv_n=hps.cv_n)

cv_id: 0 ->  pos: 126  / neg: 5303  / all: 5429
cv_id: 1 ->  pos: 126  / neg: 5303  / all: 5429
cv_id: 2 ->  pos: 126  / neg: 5303  / all: 5429
cv_id: 3 ->  pos: 126  / neg: 5303  / all: 5429
cv_id: 4 ->  pos: 126  / neg: 5303  / all: 5429


## Hugging Face

In [8]:
base_tokenizer = transformers.AutoTokenizer.from_pretrained(hps.model_name)

bert_config = transformers.AutoConfig.from_pretrained(hps.model_name)
bert_config.output_hidden_states = True

## DataSet / DataLoader

In [9]:
class TextClassificationDataset(Dataset):
    def __init__(self, df, tokenizer, use_col='title_abstract', token_max_length=512, argument=False, upsample_pos_n=1):

        if upsample_pos_n > 1:
            df_pos = df.loc[df.judgement==1]
            df_pos = pd.concat([df_pos for i in range(int(upsample_pos_n))], axis=0).reset_index(drop=True)
            df_neg = df.loc[df.judgement==0]
            self.df = pd.concat([df_pos, df_neg], axis=0).reset_index(drop=True)
        else:
            self.df = df
        
        self.tokenizer = tokenizer
        self.argument = argument
        self.use_col = use_col

    def text_argument(self, text, drop_min_seq=3, seq_sort=True):
        seq_list = text.split('. ')
        seq_len = len(seq_list)
        if seq_len >= drop_min_seq:
            orig_idx_list = list(range(0, seq_len))
            idx_list = random.sample(orig_idx_list, random.randint(round(seq_len * 0.7), seq_len))
            if seq_sort:
                idx_list = sorted(idx_list)
            insert_idx_list = random.sample(orig_idx_list, random.randint(0, seq_len//3))
            for x in insert_idx_list:
                idx = random.randint(0, len(idx_list))
                idx_list.insert(idx, x)
            seq_list = [seq_list[i] for i in idx_list]
        text = '. '.join(seq_list)
        return text

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        text = self.df.loc[idx, self.use_col]

        if self.argument:
            text = self.text_argument(text, drop_min_seq=3, seq_sort=True)

        token = self.tokenizer.encode_plus(
            text,
            padding = 'max_length', max_length = hps.token_max_length, truncation = True,
            return_attention_mask=True, return_tensors='pt'
        )

        sample = dict(
            input_ids=token['input_ids'][0],
            attention_mask=token['attention_mask'][0]
        )
        
        label = torch.tensor(self.df.loc[idx, 'judgement'], dtype=torch.float32)
        return sample, label
        

## Model

In [10]:
class BertLstmModel(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.bert = transformers.AutoModel.from_pretrained(hps.model_name, config=bert_config)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True, bidirectional=True)
        self.leakyrelu = nn.LeakyReLU()
        self.dropout = nn.Dropout(p=0.5)
        self.regressor = nn.Linear(hidden_size*2, 1)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        self.lstm.flatten_parameters()
        out, _ = self.lstm(outputs['last_hidden_state'], None)
        out = self.leakyrelu(out)
        sequence_output = out[:, -1, :]
        output = self.dropout(sequence_output)
        logits = torch.flatten(self.regressor(output))
        return logits

In [11]:
class BertLstmExModel(nn.Module):
    def __init__(self, hidden_size, config, use_hidden_n=10):
        super().__init__()
        
        self.bert = transformers.AutoModel.from_pretrained(hps.model_name, config=bert_config)
        self.hidden_size = hidden_size
        self.use_hidden_n = use_hidden_n
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, batch_first=True, bidirectional=True)
        self.leakyrelu = nn.LeakyReLU()
        self.dropout = nn.Dropout(p=0.3)
        self.conv1d = nn.Conv1d(in_channels=self.use_hidden_n, out_channels=1, kernel_size=3, padding='same')
        self.regressor = nn.Linear(self.hidden_size*2, 1)
        
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states_list = [outputs['hidden_states'][-1*i] for i in range(1, self.use_hidden_n+1)]
        self.lstm.flatten_parameters()
        out_list = [
            self.dropout(
                self.leakyrelu(
                    self.lstm(hidden_state, None)[0]
                )[:, -1, :]
            ).view(-1, 1, self.hidden_size*2)  # (batch, use_hidden_n, hidden_size*2)
        for hidden_state in hidden_states_list]

        out = torch.cat(out_list, dim=1)

        out = self.dropout(self.leakyrelu(self.conv1d(out)))
        out = out.view(out.size(0), -1)

        logits = torch.flatten(self.regressor(out))
        return logits

## Checkpoint

In [12]:
class ModelCheckpoint:
    def __init__(self, save_dir:str, save_name:str, cv_id:int):
        os.makedirs(save_dir, exist_ok=True)
        self.cv_id = cv_id
        self.save_dir = save_dir
        self.save_name = save_name
        self.best_loss = self.best_acc = self.best_fbeta_score = 0.0

    def get_checkpoint_name(self):
        checkpoint_name = f"{self.save_name.replace('/', '_')}_cv{self.cv_id}.pth"
        checkpoint_name = os.path.join(self.save_dir, checkpoint_name)
        return checkpoint_name

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.get_checkpoint_name())

    def load_checkpoint(self, model=None, manual_name=None):
        if manual_name is None:
            checkpoint_name = self.get_checkpoint_name()
        else:
            checkpoint_name = manual_name
        print(checkpoint_name)
        model.load_state_dict(torch.load(checkpoint_name))
        return model

## Fit

In [13]:
def fit(dataloaders, model, optimizer, num_epochs, device, batch_size, lr_scheduler, cv_id):

    seed_torch(hps.random_seed)

    history = {
        'train':{'loss':[], 'acc':[], 'fbscore':[]},
        'val':{'loss':[], 'acc':[], 'fbscore':[]},
        'lr':[],
    }

    checkpoint = ModelCheckpoint(save_dir='cross_validation_weights', save_name='bert_text_classification', cv_id=cv_id)
    best_model_wts = copy.deepcopy(model.state_dict())

    print(f"Using device : {device}")
    for epoch in range(num_epochs):
        print(f"【Epoch {epoch+1: 3}/{num_epochs: 3}】   LR -> ", end='')
        for i, params in enumerate(optimizer.param_groups):
            print(f"Group{i}: {params['lr']:.4e}", end=' / ')
        print('')

        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0
            running_fbeta_score = 0.0
            if phase == 'train':
                model.train()
            else:
                model.eval()
            for i, (inputs, labels) in enumerate(tqdm(dataloaders[phase])):
                input_ids = inputs['input_ids']
                attention_mask = inputs['attention_mask']
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    logits_outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    pos_weight = torch.tensor([hps.class_1_weight for i in range(input_ids.size(0))]).to(device)
                    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
                    loss = criterion(logits_outputs, labels)

                    outputs = torch.sigmoid(logits_outputs)
                    preds = torch.where(outputs >= 0.5, 1, 0)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        lr_scheduler.step()

                running_loss += loss.item() * input_ids.size(0)
                running_corrects += torch.sum(preds == labels)
                running_fbeta_score += fbeta_score(labels.to('cpu').detach().numpy(), preds.to('cpu').detach().numpy(), beta=7.0, zero_division=0) * input_ids.size(0)    

                if phase == 'train':
                    if i % 10 == 9:
                        total_num = float((i * batch_size) + input_ids.size(0))
                        print(f"{i+1: 4}/{len(dataloaders[phase]): 4}  <{phase}> Loss:{(running_loss/total_num):.4f}  Acc:{(running_corrects/total_num):.4f}  fbScore:{(running_fbeta_score/total_num):.4f}   LR -> ", end='')
                        for i, params in enumerate(optimizer.param_groups):
                            print(f"Group{i}: {params['lr']:.4e}", end=' / ')
                            if isinstance(optimizer.param_groups[0]['lr'], float):
                                history['lr'].append(optimizer.param_groups[0]['lr'])
                            else:
                                history['lr'].append(optimizer.param_groups[0]['lr'].item())
                        print('')

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            epoch_fbscore = running_fbeta_score / len(dataloaders[phase].dataset)
            
            print(f"<{phase}> Loss:{epoch_loss:.4f}  Acc:{epoch_acc:.4f}  fbScore:{epoch_fbscore:.4f}")

            history[phase]['loss'].append(epoch_loss)
            history[phase]['acc'].append(epoch_acc.item())
            history[phase]['fbscore'].append(epoch_fbscore)


            if phase == 'val' and epoch_fbscore > checkpoint.best_fbeta_score:
                print(f"Checkpoints have been updated to the epoch {epoch+1} weights.")
                checkpoint.best_loss = epoch_loss
                checkpoint.best_acc = epoch_acc
                checkpoint.best_fbeta_score = epoch_fbscore
                checkpoint.best_epoch = epoch+1
                best_model_wts = copy.deepcopy(model.state_dict())

        print('-' * 150)

    model.load_state_dict(best_model_wts)
    checkpoint.save_checkpoint(model)
    torch.cuda.empty_cache()

    return model, history

## Inference

In [14]:
def inference(model, dataloader, device, evaluate=True):
    
    running_loss = 0.0
    running_corrects = 0
    running_fbeta_score = 0.0

    preds_labels_dict = dict(preds = np.empty(0), labels = np.empty(0))

    model.eval()

    for i, (inputs, labels) in enumerate(tqdm(dataloader)):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            logits_outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            if evaluate:
                pos_weight = torch.tensor([hps.class_1_weight for i in range(input_ids.size(0))]).to(device)
                criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
                loss = criterion(logits_outputs, labels)

            outputs = torch.sigmoid(logits_outputs)
            preds = torch.where(outputs >= 0.5, 1, 0)
            
            if evaluate:
                running_loss += loss.item() * input_ids.size(0)
                running_corrects += torch.sum(preds == labels)
                running_fbeta_score += fbeta_score(labels.to('cpu').detach().numpy(), preds.to('cpu').detach().numpy(), beta=7.0, zero_division=0) * input_ids.size(0)

            preds_labels_dict['preds']  = np.hstack([preds_labels_dict['preds'], preds.to('cpu').detach().numpy().copy()])
            preds_labels_dict['labels']  = np.hstack([preds_labels_dict['labels'], labels.to('cpu').detach().numpy().copy()])
    
    if evaluate:
        loss = running_loss / len(dataloader.dataset)
        acc = running_corrects / len(dataloader.dataset)
        fbscore = running_fbeta_score / len(dataloader.dataset)
        print(f"Loss:{loss:.4f}  Acc:{acc:.4f}  fbScore:{fbscore:.4f}")
    return preds_labels_dict

## CrossValidation Loop

In [15]:
def model_setup(model, dataloaders):

    optimizer = optim.AdamW(
        params=[
            {'params': model.bert.embeddings.parameters(), 'lr': 1e-5},
            {'params': model.bert.encoder.parameters(), 'lr': 2e-5},
            {'params': model.bert.pooler.parameters(), 'lr': 3e-5},
            {'params': model.lstm.parameters(), 'lr': 5e-4},
            {'params': model.conv1d.parameters(), 'lr': 5e-4},
            {'params': model.regressor.parameters(), 'lr': 5e-4}
        ]
    )
    num_warmup_steps = round(hps.num_epochs * len(dataloaders['train']) * 0.1)
    num_training_steps = round(hps.num_epochs * len(dataloaders['train']))
    print(f"InitLR:{hps.initial_lr} / num_warmup_steps:{num_warmup_steps} / num_training_steps:{num_training_steps}")
    lr_scheduler = transformers.get_linear_schedule_with_warmup(optimizer=optimizer, num_warmup_steps=num_warmup_steps, 
                                                                num_training_steps=num_training_steps, last_epoch=-1)

    return (optimizer, lr_scheduler)

In [16]:
def cross_validation(cv_n, orig_df, test_df):

    logs = {
        'fit_history':[],
        'test_preds_labels':[],
        'test_fb_score':[],
    }

    for i in range(cv_n):
        print('\033[32m' + f"Cross-validation loop : {i+1}/{cv_n}" + '\033[0m')

        # DataFrame
        train_df = orig_df.loc[orig_df.cv_id != i].copy().reset_index(drop=True)
        valid_df = orig_df.loc[orig_df.cv_id == i].copy().reset_index(drop=True)
        test_df = test_df.reset_index(drop=True)
        print(f"Train  ->  label_1:{train_df.judgement.sum()} / all:{train_df.judgement.count()}   ({train_df.judgement.sum() / train_df.judgement.count() * 100:.3f}%)")
        print(f"Valid  ->  label_1:{valid_df.judgement.sum()} / all:{valid_df.judgement.count()}   ({valid_df.judgement.sum() / valid_df.judgement.count() * 100:.3f}%)")

        # Dataset / Dataloader
        phase_param = {
            "df":{'train': train_df, 'val': valid_df, 'test': test_df, 'submit': submit_df},
            "argument":{'train': hps.train_argument, 'val': False, 'test': False, 'submit': False},
            "batch_size":{'train':hps.batch_size, 'val':hps.batch_size*4, 'test':hps.batch_size*4, 'submit': hps.batch_size*4},
            "shuffle":{'train': True, 'val': False, 'test': False, 'submit': False},
            "upsample_pos_n":{'train': hps.upsample_pos_n, 'val': 1, 'test': 1, 'submit': 1},
        }
        datasets = {phase:TextClassificationDataset(df=phase_param['df'][phase], tokenizer=base_tokenizer, use_col=hps.use_col,\
                                                    token_max_length=hps.token_max_length, argument=phase_param['argument'][phase],\
                                                    upsample_pos_n=phase_param['upsample_pos_n'][phase]) for phase in ['train', 'val', 'test', 'submit']}
        dataloaders = {phase: DataLoader(datasets[phase], batch_size=phase_param['batch_size'][phase], \
                                        shuffle=phase_param['shuffle'][phase]) for phase in ['train', 'val', 'test', 'submit']}
        
        # Model / Optimizer
        if hps.model_type == 'lstm':
            print(f"Choosed BertLstmModel")
            model = BertLstmModel(hidden_size=bert_config.hidden_size)
        elif hps.model_type == 'lstm_ex':
            print(f"Choosed BertLstmExModel")
            model = BertLstmExModel(hidden_size=bert_config.hidden_size, config=bert_config, use_hidden_n=4)

        optimizer, lr_scheduler = model_setup(model, dataloaders)
        model = model.to(device)
        device_num = torch.cuda.device_count()
        if device_num > 1:
            print(f"Use {device_num} GPUs")
            model = nn.DataParallel(model)

        # Training / Validation
        model, fit_history = fit(dataloaders=dataloaders, model=model, optimizer=optimizer, num_epochs=hps.num_epochs, 
                             device=device, batch_size=hps.batch_size, lr_scheduler=lr_scheduler, cv_id=i)

        # Evaluate
        print(f"Evaluate Test Dataset")
        test_preds_labels_dict = inference(model, dataloader=dataloaders['test'], device=device)
        test_fb_score = fbeta_score(y_true=test_preds_labels_dict['labels'], y_pred=test_preds_labels_dict['preds'], beta=7.0)
        print(f"fb_score : {test_fb_score}")   

        logs['fit_history'].append(fit_history)
        logs['test_preds_labels'].append(test_preds_labels_dict)
        logs['test_fb_score'].append(test_fb_score)

        del model, datasets, dataloaders
        torch.cuda.empty_cache()
        print()

    return logs


In [17]:
logs = cross_validation(cv_n=hps.cv_n, orig_df=train_df, test_df=test_df)

[32mCross-validation loop : 1/5[0m
Train  ->  label_1:504 / all:21716   (2.321%)
Valid  ->  label_1:126 / all:5429   (2.321%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:85 / num_training_steps:850
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:2.5485  Acc:0.0563  fbScore:0.4570   LR -> Group0: 1.1765e-06 / Group1: 2.3529e-06 / Group2: 3.5294e-06 / Group3: 5.8824e-05 / Group4: 5.8824e-05 / Group5: 5.8824e-05 / 
  20/ 170  <train> Loss:2.6329  Acc:0.0410  fbScore:0.5095   LR -> Group0: 2.3529e-06 / Group1: 4.7059e-06 / Group2: 7.0588e-06 / Group3: 1.1765e-04 / Group4: 1.1765e-04 / Group5: 1.1765e-04 / 
  30/ 170  <train> Loss:2.5131  Acc:0.0346  fbScore:0.5044   LR -> Group0: 3.5294e-06 / Group1: 7.0588e-06 / Group2: 1.0588e-05 / Group3: 1.7647e-04 / Group4: 1.7647e-04 / Group5: 1.7647e-04 / 
  40/ 170  <train> Loss:2.5331  Acc:0.0340  fbScore:0.5242   LR -> Group0: 4.7059e-06 / Group1: 9.4118e-06 / Group2: 1.4118e-05 / Group3: 2.3529e-04 / Group4: 2.3529e-04 / Group5: 2.3529e-04 / 
  50/ 170  <train> Loss:2.4598  Acc:0.0316  fbScore:0.5166   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  60/ 170  <train> Loss:2.4030

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.9037  Acc:0.7220  fbScore:0.7972
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.8034  Acc:0.7359  fbScore:0.7949   LR -> Group0: 8.7582e-06 / Group1: 1.7516e-05 / Group2: 2.6275e-05 / Group3: 4.3791e-04 / Group4: 4.3791e-04 / Group5: 4.3791e-04 / 
  20/ 170  <train> Loss:0.8134  Acc:0.8074  fbScore:0.7451   LR -> Group0: 8.6275e-06 / Group1: 1.7255e-05 / Group2: 2.5882e-05 / Group3: 4.3137e-04 / Group4: 4.3137e-04 / Group5: 4.3137e-04 / 
  30/ 170  <train> Loss:0.7570  Acc:0.8146  fbScore:0.7515   LR -> Group0: 8.4967e-06 / Group1: 1.6993e-05 / Group2: 2.5490e-05 / Group3: 4.2484e-04 / Group4: 4.2484e-04 / Group5: 4.2484e-04 / 
  40/ 170  <train> Loss:0.7185  Acc:0.8309  fbScore:0.7891   LR -> Group0: 8.3660e-06 / Group1: 1.6732e-05 / Group2: 2.5098e-05 / Group3: 4.1830e-04 / Group4: 4.1830e-04 / Group5: 4.1830e-04 / 
  50/ 170  <train> Loss:0.6941  Acc:0.8473  fbScore:0.8056   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  60/ 170  <train> Loss:0.6782

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6527  Acc:0.8589  fbScore:0.8611
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.4471  Acc:0.8938  fbScore:0.7744   LR -> Group0: 6.5359e-06 / Group1: 1.3072e-05 / Group2: 1.9608e-05 / Group3: 3.2680e-04 / Group4: 3.2680e-04 / Group5: 3.2680e-04 / 
  20/ 170  <train> Loss:0.4330  Acc:0.9070  fbScore:0.8088   LR -> Group0: 6.4052e-06 / Group1: 1.2810e-05 / Group2: 1.9216e-05 / Group3: 3.2026e-04 / Group4: 3.2026e-04 / Group5: 3.2026e-04 / 
  30/ 170  <train> Loss:0.4260  Acc:0.9063  fbScore:0.8100   LR -> Group0: 6.2745e-06 / Group1: 1.2549e-05 / Group2: 1.8824e-05 / Group3: 3.1373e-04 / Group4: 3.1373e-04 / Group5: 3.1373e-04 / 
  40/ 170  <train> Loss:0.3881  Acc:0.9133  fbScore:0.8432   LR -> Group0: 6.1438e-06 / Group1: 1.2288e-05 / Group2: 1.8431e-05 / Group3: 3.0719e-04 / Group4: 3.0719e-04 / Group5: 3.0719e-04 / 
  50/ 170  <train> Loss:0.3899  Acc:0.9161  fbScore:0.8416   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  60/ 170  <train> Loss:0.4464

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.7365  Acc:0.8876  fbScore:0.8670
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3223  Acc:0.9148  fbScore:0.8839   LR -> Group0: 4.3137e-06 / Group1: 8.6275e-06 / Group2: 1.2941e-05 / Group3: 2.1569e-04 / Group4: 2.1569e-04 / Group5: 2.1569e-04 / 
  20/ 170  <train> Loss:0.2851  Acc:0.9332  fbScore:0.8723   LR -> Group0: 4.1830e-06 / Group1: 8.3660e-06 / Group2: 1.2549e-05 / Group3: 2.0915e-04 / Group4: 2.0915e-04 / Group5: 2.0915e-04 / 
  30/ 170  <train> Loss:0.2617  Acc:0.9393  fbScore:0.9031   LR -> Group0: 4.0523e-06 / Group1: 8.1046e-06 / Group2: 1.2157e-05 / Group3: 2.0261e-04 / Group4: 2.0261e-04 / Group5: 2.0261e-04 / 
  40/ 170  <train> Loss:0.2461  Acc:0.9434  fbScore:0.8903   LR -> Group0: 3.9216e-06 / Group1: 7.8431e-06 / Group2: 1.1765e-05 / Group3: 1.9608e-04 / Group4: 1.9608e-04 / Group5: 1.9608e-04 / 
  50/ 170  <train> Loss:0.2383  Acc:0.9461  fbScore:0.9038   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  60/ 170  <train> Loss:0.2409

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.8264  Acc:0.9379  fbScore:0.8893
Checkpoints have been updated to the epoch 4 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3128  Acc:0.9328  fbScore:0.9442   LR -> Group0: 2.0915e-06 / Group1: 4.1830e-06 / Group2: 6.2745e-06 / Group3: 1.0458e-04 / Group4: 1.0458e-04 / Group5: 1.0458e-04 / 
  20/ 170  <train> Loss:0.2988  Acc:0.9348  fbScore:0.9204   LR -> Group0: 1.9608e-06 / Group1: 3.9216e-06 / Group2: 5.8824e-06 / Group3: 9.8039e-05 / Group4: 9.8039e-05 / Group5: 9.8039e-05 / 
  30/ 170  <train> Loss:0.2806  Acc:0.9388  fbScore:0.9321   LR -> Group0: 1.8301e-06 / Group1: 3.6601e-06 / Group2: 5.4902e-06 / Group3: 9.1503e-05 / Group4: 9.1503e-05 / Group5: 9.1503e-05 / 
  40/ 170  <train> Loss:0.2550  Acc:0.9438  fbScore:0.9356   LR -> Group0: 1.6993e-06 / Group1: 3.3987e-06 / Group2: 5.0980e-06 / Group3: 8.4967e-05 / Group4: 8.4967e-05 / Group5: 8.4967e-05 / 
  50/ 170  <train> Loss:0.2542  Acc:0.9464  fbScore:0.9417   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  60/ 170  <train> Loss:0.2469

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:1.2436  Acc:0.9547  fbScore:0.8606
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/54 [00:00<?, ?it/s]

Loss:0.3533  Acc:0.9391  fbScore:0.9295
fb_score : 0.9385562530178659

[32mCross-validation loop : 2/5[0m
Train  ->  label_1:504 / all:21716   (2.321%)
Valid  ->  label_1:126 / all:5429   (2.321%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:85 / num_training_steps:850
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:3.9551  Acc:0.0320  fbScore:0.6000   LR -> Group0: 1.1765e-06 / Group1: 2.3529e-06 / Group2: 3.5294e-06 / Group3: 5.8824e-05 / Group4: 5.8824e-05 / Group5: 5.8824e-05 / 
  20/ 170  <train> Loss:3.7548  Acc:0.0301  fbScore:0.5629   LR -> Group0: 2.3529e-06 / Group1: 4.7059e-06 / Group2: 7.0588e-06 / Group3: 1.1765e-04 / Group4: 1.1765e-04 / Group5: 1.1765e-04 / 
  30/ 170  <train> Loss:3.4486  Acc:0.0271  fbScore:0.5409   LR -> Group0: 3.5294e-06 / Group1: 7.0588e-06 / Group2: 1.0588e-05 / Group3: 1.7647e-04 / Group4: 1.7647e-04 / Group5: 1.7647e-04 / 
  40/ 170  <train> Loss:3.3881  Acc:0.0266  fbScore:0.5391   LR -> Group0: 4.7059e-06 / Group1: 9.4118e-06 / Group2: 1.4118e-05 / Group3: 2.3529e-04 / Group4: 2.3529e-04 / Group5: 2.3529e-04 / 
  50/ 170  <train> Loss:3.2418  Acc:0.0253  fbScore:0.5183   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  60/ 170  <train> Loss:3.2260

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6420  Acc:0.8847  fbScore:0.8884
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:1.0381  Acc:0.9070  fbScore:0.6925   LR -> Group0: 8.7582e-06 / Group1: 1.7516e-05 / Group2: 2.6275e-05 / Group3: 4.3791e-04 / Group4: 4.3791e-04 / Group5: 4.3791e-04 / 
  20/ 170  <train> Loss:1.0028  Acc:0.8438  fbScore:0.7547   LR -> Group0: 8.6275e-06 / Group1: 1.7255e-05 / Group2: 2.5882e-05 / Group3: 4.3137e-04 / Group4: 4.3137e-04 / Group5: 4.3137e-04 / 
  30/ 170  <train> Loss:1.0466  Acc:0.8500  fbScore:0.7727   LR -> Group0: 8.4967e-06 / Group1: 1.6993e-05 / Group2: 2.5490e-05 / Group3: 4.2484e-04 / Group4: 4.2484e-04 / Group5: 4.2484e-04 / 
  40/ 170  <train> Loss:0.9932  Acc:0.8568  fbScore:0.7957   LR -> Group0: 8.3660e-06 / Group1: 1.6732e-05 / Group2: 2.5098e-05 / Group3: 4.1830e-04 / Group4: 4.1830e-04 / Group5: 4.1830e-04 / 
  50/ 170  <train> Loss:0.9600  Acc:0.8644  fbScore:0.7858   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  60/ 170  <train> Loss:0.9515

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.4465  Acc:0.9057  fbScore:0.8988
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.2647  Acc:0.9336  fbScore:0.8353   LR -> Group0: 6.5359e-06 / Group1: 1.3072e-05 / Group2: 1.9608e-05 / Group3: 3.2680e-04 / Group4: 3.2680e-04 / Group5: 3.2680e-04 / 
  20/ 170  <train> Loss:0.3435  Acc:0.9500  fbScore:0.8885   LR -> Group0: 6.4052e-06 / Group1: 1.2810e-05 / Group2: 1.9216e-05 / Group3: 3.2026e-04 / Group4: 3.2026e-04 / Group5: 3.2026e-04 / 
  30/ 170  <train> Loss:0.4621  Acc:0.9289  fbScore:0.8710   LR -> Group0: 6.2745e-06 / Group1: 1.2549e-05 / Group2: 1.8824e-05 / Group3: 3.1373e-04 / Group4: 3.1373e-04 / Group5: 3.1373e-04 / 
  40/ 170  <train> Loss:0.5225  Acc:0.8939  fbScore:0.8516   LR -> Group0: 6.1438e-06 / Group1: 1.2288e-05 / Group2: 1.8431e-05 / Group3: 3.0719e-04 / Group4: 3.0719e-04 / Group5: 3.0719e-04 / 
  50/ 170  <train> Loss:0.4878  Acc:0.8973  fbScore:0.8294   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  60/ 170  <train> Loss:0.4662

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6760  Acc:0.9552  fbScore:0.9037
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.2696  Acc:0.9570  fbScore:0.8622   LR -> Group0: 4.3137e-06 / Group1: 8.6275e-06 / Group2: 1.2941e-05 / Group3: 2.1569e-04 / Group4: 2.1569e-04 / Group5: 2.1569e-04 / 
  20/ 170  <train> Loss:0.2300  Acc:0.9594  fbScore:0.9137   LR -> Group0: 4.1830e-06 / Group1: 8.3660e-06 / Group2: 1.2549e-05 / Group3: 2.0915e-04 / Group4: 2.0915e-04 / Group5: 2.0915e-04 / 
  30/ 170  <train> Loss:0.3789  Acc:0.9516  fbScore:0.8239   LR -> Group0: 4.0523e-06 / Group1: 8.1046e-06 / Group2: 1.2157e-05 / Group3: 2.0261e-04 / Group4: 2.0261e-04 / Group5: 2.0261e-04 / 
  40/ 170  <train> Loss:0.3401  Acc:0.9508  fbScore:0.8312   LR -> Group0: 3.9216e-06 / Group1: 7.8431e-06 / Group2: 1.1765e-05 / Group3: 1.9608e-04 / Group4: 1.9608e-04 / Group5: 1.9608e-04 / 
  50/ 170  <train> Loss:0.3202  Acc:0.9512  fbScore:0.8539   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  60/ 170  <train> Loss:0.3353

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.7037  Acc:0.9449  fbScore:0.9022
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3569  Acc:0.9406  fbScore:0.7379   LR -> Group0: 2.0915e-06 / Group1: 4.1830e-06 / Group2: 6.2745e-06 / Group3: 1.0458e-04 / Group4: 1.0458e-04 / Group5: 1.0458e-04 / 
  20/ 170  <train> Loss:0.2916  Acc:0.9426  fbScore:0.8337   LR -> Group0: 1.9608e-06 / Group1: 3.9216e-06 / Group2: 5.8824e-06 / Group3: 9.8039e-05 / Group4: 9.8039e-05 / Group5: 9.8039e-05 / 
  30/ 170  <train> Loss:0.2793  Acc:0.9430  fbScore:0.8751   LR -> Group0: 1.8301e-06 / Group1: 3.6601e-06 / Group2: 5.4902e-06 / Group3: 9.1503e-05 / Group4: 9.1503e-05 / Group5: 9.1503e-05 / 
  40/ 170  <train> Loss:0.2724  Acc:0.9418  fbScore:0.8920   LR -> Group0: 1.6993e-06 / Group1: 3.3987e-06 / Group2: 5.0980e-06 / Group3: 8.4967e-05 / Group4: 8.4967e-05 / Group5: 8.4967e-05 / 
  50/ 170  <train> Loss:0.2619  Acc:0.9422  fbScore:0.8840   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  60/ 170  <train> Loss:0.2564

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6828  Acc:0.9556  fbScore:0.9066
Checkpoints have been updated to the epoch 5 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/54 [00:00<?, ?it/s]

Loss:0.2851  Acc:0.9585  fbScore:0.9447
fb_score : 0.9507161039040696

[32mCross-validation loop : 3/5[0m
Train  ->  label_1:504 / all:21716   (2.321%)
Valid  ->  label_1:126 / all:5429   (2.321%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:85 / num_training_steps:850
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:3.3194  Acc:0.0258  fbScore:0.5281   LR -> Group0: 1.1765e-06 / Group1: 2.3529e-06 / Group2: 3.5294e-06 / Group3: 5.8824e-05 / Group4: 5.8824e-05 / Group5: 5.8824e-05 / 
  20/ 170  <train> Loss:3.1994  Acc:0.0246  fbScore:0.5227   LR -> Group0: 2.3529e-06 / Group1: 4.7059e-06 / Group2: 7.0588e-06 / Group3: 1.1765e-04 / Group4: 1.1765e-04 / Group5: 1.1765e-04 / 
  30/ 170  <train> Loss:3.1828  Acc:0.0245  fbScore:0.5306   LR -> Group0: 3.5294e-06 / Group1: 7.0588e-06 / Group2: 1.0588e-05 / Group3: 1.7647e-04 / Group4: 1.7647e-04 / Group5: 1.7647e-04 / 
  40/ 170  <train> Loss:3.0156  Acc:0.0229  fbScore:0.4965   LR -> Group0: 4.7059e-06 / Group1: 9.4118e-06 / Group2: 1.4118e-05 / Group3: 2.3529e-04 / Group4: 2.3529e-04 / Group5: 2.3529e-04 / 
  50/ 170  <train> Loss:2.9582  Acc:0.0223  fbScore:0.4917   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  60/ 170  <train> Loss:2.9663

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.9835  Acc:0.7908  fbScore:0.8336
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:1.1646  Acc:0.6484  fbScore:0.7822   LR -> Group0: 8.7582e-06 / Group1: 1.7516e-05 / Group2: 2.6275e-05 / Group3: 4.3791e-04 / Group4: 4.3791e-04 / Group5: 4.3791e-04 / 
  20/ 170  <train> Loss:1.0100  Acc:0.7578  fbScore:0.7355   LR -> Group0: 8.6275e-06 / Group1: 1.7255e-05 / Group2: 2.5882e-05 / Group3: 4.3137e-04 / Group4: 4.3137e-04 / Group5: 4.3137e-04 / 
  30/ 170  <train> Loss:0.9446  Acc:0.7865  fbScore:0.7707   LR -> Group0: 8.4967e-06 / Group1: 1.6993e-05 / Group2: 2.5490e-05 / Group3: 4.2484e-04 / Group4: 4.2484e-04 / Group5: 4.2484e-04 / 
  40/ 170  <train> Loss:0.8938  Acc:0.7869  fbScore:0.7791   LR -> Group0: 8.3660e-06 / Group1: 1.6732e-05 / Group2: 2.5098e-05 / Group3: 4.1830e-04 / Group4: 4.1830e-04 / Group5: 4.1830e-04 / 
  50/ 170  <train> Loss:0.8911  Acc:0.8020  fbScore:0.7895   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  60/ 170  <train> Loss:0.8154

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.7101  Acc:0.7425  fbScore:0.8106
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.8906  Acc:0.8016  fbScore:0.7551   LR -> Group0: 6.5359e-06 / Group1: 1.3072e-05 / Group2: 1.9608e-05 / Group3: 3.2680e-04 / Group4: 3.2680e-04 / Group5: 3.2680e-04 / 
  20/ 170  <train> Loss:0.7295  Acc:0.8285  fbScore:0.8173   LR -> Group0: 6.4052e-06 / Group1: 1.2810e-05 / Group2: 1.9216e-05 / Group3: 3.2026e-04 / Group4: 3.2026e-04 / Group5: 3.2026e-04 / 
  30/ 170  <train> Loss:0.6717  Acc:0.8448  fbScore:0.8481   LR -> Group0: 6.2745e-06 / Group1: 1.2549e-05 / Group2: 1.8824e-05 / Group3: 3.1373e-04 / Group4: 3.1373e-04 / Group5: 3.1373e-04 / 
  40/ 170  <train> Loss:0.6372  Acc:0.8537  fbScore:0.8567   LR -> Group0: 6.1438e-06 / Group1: 1.2288e-05 / Group2: 1.8431e-05 / Group3: 3.0719e-04 / Group4: 3.0719e-04 / Group5: 3.0719e-04 / 
  50/ 170  <train> Loss:0.6070  Acc:0.8584  fbScore:0.8649   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  60/ 170  <train> Loss:0.5823

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.5763  Acc:0.9429  fbScore:0.9187
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.5105  Acc:0.9242  fbScore:0.8248   LR -> Group0: 4.3137e-06 / Group1: 8.6275e-06 / Group2: 1.2941e-05 / Group3: 2.1569e-04 / Group4: 2.1569e-04 / Group5: 2.1569e-04 / 
  20/ 170  <train> Loss:0.4505  Acc:0.9012  fbScore:0.8563   LR -> Group0: 4.1830e-06 / Group1: 8.3660e-06 / Group2: 1.2549e-05 / Group3: 2.0915e-04 / Group4: 2.0915e-04 / Group5: 2.0915e-04 / 
  30/ 170  <train> Loss:0.4133  Acc:0.8982  fbScore:0.8408   LR -> Group0: 4.0523e-06 / Group1: 8.1046e-06 / Group2: 1.2157e-05 / Group3: 2.0261e-04 / Group4: 2.0261e-04 / Group5: 2.0261e-04 / 
  40/ 170  <train> Loss:0.4194  Acc:0.8996  fbScore:0.8530   LR -> Group0: 3.9216e-06 / Group1: 7.8431e-06 / Group2: 1.1765e-05 / Group3: 1.9608e-04 / Group4: 1.9608e-04 / Group5: 1.9608e-04 / 
  50/ 170  <train> Loss:0.3884  Acc:0.9044  fbScore:0.8663   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  60/ 170  <train> Loss:0.3685

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.4177  Acc:0.9190  fbScore:0.9182
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3077  Acc:0.9141  fbScore:0.8333   LR -> Group0: 2.0915e-06 / Group1: 4.1830e-06 / Group2: 6.2745e-06 / Group3: 1.0458e-04 / Group4: 1.0458e-04 / Group5: 1.0458e-04 / 
  20/ 170  <train> Loss:0.3213  Acc:0.9191  fbScore:0.7936   LR -> Group0: 1.9608e-06 / Group1: 3.9216e-06 / Group2: 5.8824e-06 / Group3: 9.8039e-05 / Group4: 9.8039e-05 / Group5: 9.8039e-05 / 
  30/ 170  <train> Loss:0.4812  Acc:0.9190  fbScore:0.8313   LR -> Group0: 1.8301e-06 / Group1: 3.6601e-06 / Group2: 5.4902e-06 / Group3: 9.1503e-05 / Group4: 9.1503e-05 / Group5: 9.1503e-05 / 
  40/ 170  <train> Loss:0.4357  Acc:0.9170  fbScore:0.8346   LR -> Group0: 1.6993e-06 / Group1: 3.3987e-06 / Group2: 5.0980e-06 / Group3: 8.4967e-05 / Group4: 8.4967e-05 / Group5: 8.4967e-05 / 
  50/ 170  <train> Loss:0.3919  Acc:0.9220  fbScore:0.8534   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  60/ 170  <train> Loss:0.3741

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.4997  Acc:0.9425  fbScore:0.9128
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/54 [00:00<?, ?it/s]

Loss:0.3516  Acc:0.9451  fbScore:0.9327
fb_score : 0.9416795560003639

[32mCross-validation loop : 4/5[0m
Train  ->  label_1:504 / all:21716   (2.321%)
Valid  ->  label_1:126 / all:5429   (2.321%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:85 / num_training_steps:850
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:3.0050  Acc:0.0227  fbScore:0.4951   LR -> Group0: 1.1765e-06 / Group1: 2.3529e-06 / Group2: 3.5294e-06 / Group3: 5.8824e-05 / Group4: 5.8824e-05 / Group5: 5.8824e-05 / 
  20/ 170  <train> Loss:2.9244  Acc:0.0219  fbScore:0.4805   LR -> Group0: 2.3529e-06 / Group1: 4.7059e-06 / Group2: 7.0588e-06 / Group3: 1.1765e-04 / Group4: 1.1765e-04 / Group5: 1.1765e-04 / 
  30/ 170  <train> Loss:3.0544  Acc:0.0232  fbScore:0.5049   LR -> Group0: 3.5294e-06 / Group1: 7.0588e-06 / Group2: 1.0588e-05 / Group3: 1.7647e-04 / Group4: 1.7647e-04 / Group5: 1.7647e-04 / 
  40/ 170  <train> Loss:2.9985  Acc:0.0227  fbScore:0.4978   LR -> Group0: 4.7059e-06 / Group1: 9.4118e-06 / Group2: 1.4118e-05 / Group3: 2.3529e-04 / Group4: 2.3529e-04 / Group5: 2.3529e-04 / 
  50/ 170  <train> Loss:2.9760  Acc:0.0225  fbScore:0.5002   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  60/ 170  <train> Loss:2.9311

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:1.5045  Acc:0.7508  fbScore:0.7094
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:1.6849  Acc:0.6398  fbScore:0.5181   LR -> Group0: 8.7582e-06 / Group1: 1.7516e-05 / Group2: 2.6275e-05 / Group3: 4.3791e-04 / Group4: 4.3791e-04 / Group5: 4.3791e-04 / 
  20/ 170  <train> Loss:1.4675  Acc:0.6566  fbScore:0.5973   LR -> Group0: 8.6275e-06 / Group1: 1.7255e-05 / Group2: 2.5882e-05 / Group3: 4.3137e-04 / Group4: 4.3137e-04 / Group5: 4.3137e-04 / 
  30/ 170  <train> Loss:1.2927  Acc:0.6638  fbScore:0.6498   LR -> Group0: 8.4967e-06 / Group1: 1.6993e-05 / Group2: 2.5490e-05 / Group3: 4.2484e-04 / Group4: 4.2484e-04 / Group5: 4.2484e-04 / 
  40/ 170  <train> Loss:1.1618  Acc:0.7031  fbScore:0.6722   LR -> Group0: 8.3660e-06 / Group1: 1.6732e-05 / Group2: 2.5098e-05 / Group3: 4.1830e-04 / Group4: 4.1830e-04 / Group5: 4.1830e-04 / 
  50/ 170  <train> Loss:1.1228  Acc:0.7080  fbScore:0.6703   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  60/ 170  <train> Loss:1.0704

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6379  Acc:0.8379  fbScore:0.8465
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.7132  Acc:0.8156  fbScore:0.7738   LR -> Group0: 6.5359e-06 / Group1: 1.3072e-05 / Group2: 1.9608e-05 / Group3: 3.2680e-04 / Group4: 3.2680e-04 / Group5: 3.2680e-04 / 
  20/ 170  <train> Loss:0.5709  Acc:0.8395  fbScore:0.7789   LR -> Group0: 6.4052e-06 / Group1: 1.2810e-05 / Group2: 1.9216e-05 / Group3: 3.2026e-04 / Group4: 3.2026e-04 / Group5: 3.2026e-04 / 
  30/ 170  <train> Loss:0.5032  Acc:0.8677  fbScore:0.8300   LR -> Group0: 6.2745e-06 / Group1: 1.2549e-05 / Group2: 1.8824e-05 / Group3: 3.1373e-04 / Group4: 3.1373e-04 / Group5: 3.1373e-04 / 
  40/ 170  <train> Loss:0.4687  Acc:0.8861  fbScore:0.8588   LR -> Group0: 6.1438e-06 / Group1: 1.2288e-05 / Group2: 1.8431e-05 / Group3: 3.0719e-04 / Group4: 3.0719e-04 / Group5: 3.0719e-04 / 
  50/ 170  <train> Loss:0.4404  Acc:0.8983  fbScore:0.8674   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  60/ 170  <train> Loss:0.4119

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.5961  Acc:0.8983  fbScore:0.8799
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.4007  Acc:0.8961  fbScore:0.8067   LR -> Group0: 4.3137e-06 / Group1: 8.6275e-06 / Group2: 1.2941e-05 / Group3: 2.1569e-04 / Group4: 2.1569e-04 / Group5: 2.1569e-04 / 
  20/ 170  <train> Loss:0.3550  Acc:0.9062  fbScore:0.8537   LR -> Group0: 4.1830e-06 / Group1: 8.3660e-06 / Group2: 1.2549e-05 / Group3: 2.0915e-04 / Group4: 2.0915e-04 / Group5: 2.0915e-04 / 
  30/ 170  <train> Loss:0.3233  Acc:0.9156  fbScore:0.8504   LR -> Group0: 4.0523e-06 / Group1: 8.1046e-06 / Group2: 1.2157e-05 / Group3: 2.0261e-04 / Group4: 2.0261e-04 / Group5: 2.0261e-04 / 
  40/ 170  <train> Loss:0.3718  Acc:0.9236  fbScore:0.8442   LR -> Group0: 3.9216e-06 / Group1: 7.8431e-06 / Group2: 1.1765e-05 / Group3: 1.9608e-04 / Group4: 1.9608e-04 / Group5: 1.9608e-04 / 
  50/ 170  <train> Loss:0.3625  Acc:0.9225  fbScore:0.8264   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  60/ 170  <train> Loss:0.3574

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.5291  Acc:0.8773  fbScore:0.8791
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3874  Acc:0.8805  fbScore:0.7676   LR -> Group0: 2.0915e-06 / Group1: 4.1830e-06 / Group2: 6.2745e-06 / Group3: 1.0458e-04 / Group4: 1.0458e-04 / Group5: 1.0458e-04 / 
  20/ 170  <train> Loss:0.3627  Acc:0.8988  fbScore:0.8446   LR -> Group0: 1.9608e-06 / Group1: 3.9216e-06 / Group2: 5.8824e-06 / Group3: 9.8039e-05 / Group4: 9.8039e-05 / Group5: 9.8039e-05 / 
  30/ 170  <train> Loss:0.3443  Acc:0.9117  fbScore:0.8772   LR -> Group0: 1.8301e-06 / Group1: 3.6601e-06 / Group2: 5.4902e-06 / Group3: 9.1503e-05 / Group4: 9.1503e-05 / Group5: 9.1503e-05 / 
  40/ 170  <train> Loss:0.3430  Acc:0.9156  fbScore:0.8696   LR -> Group0: 1.6993e-06 / Group1: 3.3987e-06 / Group2: 5.0980e-06 / Group3: 8.4967e-05 / Group4: 8.4967e-05 / Group5: 8.4967e-05 / 
  50/ 170  <train> Loss:0.3327  Acc:0.9195  fbScore:0.8668   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  60/ 170  <train> Loss:0.3246

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6244  Acc:0.9418  fbScore:0.9015
Checkpoints have been updated to the epoch 5 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/54 [00:00<?, ?it/s]

Loss:0.3083  Acc:0.9451  fbScore:0.9392
fb_score : 0.9460852689671903

[32mCross-validation loop : 5/5[0m
Train  ->  label_1:504 / all:21716   (2.321%)
Valid  ->  label_1:126 / all:5429   (2.321%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:85 / num_training_steps:850
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:3.1621  Acc:0.0242  fbScore:0.4808   LR -> Group0: 1.1765e-06 / Group1: 2.3529e-06 / Group2: 3.5294e-06 / Group3: 5.8824e-05 / Group4: 5.8824e-05 / Group5: 5.8824e-05 / 
  20/ 170  <train> Loss:3.3191  Acc:0.0258  fbScore:0.5105   LR -> Group0: 2.3529e-06 / Group1: 4.7059e-06 / Group2: 7.0588e-06 / Group3: 1.1765e-04 / Group4: 1.1765e-04 / Group5: 1.1765e-04 / 
  30/ 170  <train> Loss:3.4995  Acc:0.0276  fbScore:0.5353   LR -> Group0: 3.5294e-06 / Group1: 7.0588e-06 / Group2: 1.0588e-05 / Group3: 1.7647e-04 / Group4: 1.7647e-04 / Group5: 1.7647e-04 / 
  40/ 170  <train> Loss:3.4671  Acc:0.0273  fbScore:0.5358   LR -> Group0: 4.7059e-06 / Group1: 9.4118e-06 / Group2: 1.4118e-05 / Group3: 2.3529e-04 / Group4: 2.3529e-04 / Group5: 2.3529e-04 / 
  50/ 170  <train> Loss:3.2801  Acc:0.0256  fbScore:0.5146   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  60/ 170  <train> Loss:3.1432

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:1.1873  Acc:0.7484  fbScore:0.7903
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:1.0751  Acc:0.7000  fbScore:0.7189   LR -> Group0: 8.7582e-06 / Group1: 1.7516e-05 / Group2: 2.6275e-05 / Group3: 4.3791e-04 / Group4: 4.3791e-04 / Group5: 4.3791e-04 / 
  20/ 170  <train> Loss:1.1111  Acc:0.7973  fbScore:0.7598   LR -> Group0: 8.6275e-06 / Group1: 1.7255e-05 / Group2: 2.5882e-05 / Group3: 4.3137e-04 / Group4: 4.3137e-04 / Group5: 4.3137e-04 / 
  30/ 170  <train> Loss:1.0848  Acc:0.8172  fbScore:0.7707   LR -> Group0: 8.4967e-06 / Group1: 1.6993e-05 / Group2: 2.5490e-05 / Group3: 4.2484e-04 / Group4: 4.2484e-04 / Group5: 4.2484e-04 / 
  40/ 170  <train> Loss:1.0975  Acc:0.8053  fbScore:0.7722   LR -> Group0: 8.3660e-06 / Group1: 1.6732e-05 / Group2: 2.5098e-05 / Group3: 4.1830e-04 / Group4: 4.1830e-04 / Group5: 4.1830e-04 / 
  50/ 170  <train> Loss:1.0423  Acc:0.8137  fbScore:0.7694   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  60/ 170  <train> Loss:0.9708

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6936  Acc:0.8425  fbScore:0.8495
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.6319  Acc:0.7937  fbScore:0.6611   LR -> Group0: 6.5359e-06 / Group1: 1.3072e-05 / Group2: 1.9608e-05 / Group3: 3.2680e-04 / Group4: 3.2680e-04 / Group5: 3.2680e-04 / 
  20/ 170  <train> Loss:0.5781  Acc:0.8340  fbScore:0.7804   LR -> Group0: 6.4052e-06 / Group1: 1.2810e-05 / Group2: 1.9216e-05 / Group3: 3.2026e-04 / Group4: 3.2026e-04 / Group5: 3.2026e-04 / 
  30/ 170  <train> Loss:0.5764  Acc:0.8576  fbScore:0.8125   LR -> Group0: 6.2745e-06 / Group1: 1.2549e-05 / Group2: 1.8824e-05 / Group3: 3.1373e-04 / Group4: 3.1373e-04 / Group5: 3.1373e-04 / 
  40/ 170  <train> Loss:0.5633  Acc:0.8570  fbScore:0.8091   LR -> Group0: 6.1438e-06 / Group1: 1.2288e-05 / Group2: 1.8431e-05 / Group3: 3.0719e-04 / Group4: 3.0719e-04 / Group5: 3.0719e-04 / 
  50/ 170  <train> Loss:0.5292  Acc:0.8711  fbScore:0.8379   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  60/ 170  <train> Loss:0.5047

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.5228  Acc:0.8840  fbScore:0.8919
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.3580  Acc:0.9125  fbScore:0.9336   LR -> Group0: 4.3137e-06 / Group1: 8.6275e-06 / Group2: 1.2941e-05 / Group3: 2.1569e-04 / Group4: 2.1569e-04 / Group5: 2.1569e-04 / 
  20/ 170  <train> Loss:0.2692  Acc:0.9344  fbScore:0.8484   LR -> Group0: 4.1830e-06 / Group1: 8.3660e-06 / Group2: 1.2549e-05 / Group3: 2.0915e-04 / Group4: 2.0915e-04 / Group5: 2.0915e-04 / 
  30/ 170  <train> Loss:0.2604  Acc:0.9435  fbScore:0.8525   LR -> Group0: 4.0523e-06 / Group1: 8.1046e-06 / Group2: 1.2157e-05 / Group3: 2.0261e-04 / Group4: 2.0261e-04 / Group5: 2.0261e-04 / 
  40/ 170  <train> Loss:0.2477  Acc:0.9469  fbScore:0.8549   LR -> Group0: 3.9216e-06 / Group1: 7.8431e-06 / Group2: 1.1765e-05 / Group3: 1.9608e-04 / Group4: 1.9608e-04 / Group5: 1.9608e-04 / 
  50/ 170  <train> Loss:0.2546  Acc:0.9475  fbScore:0.8765   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  60/ 170  <train> Loss:0.2672

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.6820  Acc:0.9396  fbScore:0.8993
Checkpoints have been updated to the epoch 4 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/170 [00:00<?, ?it/s]

  10/ 170  <train> Loss:0.2413  Acc:0.9531  fbScore:0.9544   LR -> Group0: 2.0915e-06 / Group1: 4.1830e-06 / Group2: 6.2745e-06 / Group3: 1.0458e-04 / Group4: 1.0458e-04 / Group5: 1.0458e-04 / 
  20/ 170  <train> Loss:0.2680  Acc:0.9480  fbScore:0.9483   LR -> Group0: 1.9608e-06 / Group1: 3.9216e-06 / Group2: 5.8824e-06 / Group3: 9.8039e-05 / Group4: 9.8039e-05 / Group5: 9.8039e-05 / 
  30/ 170  <train> Loss:0.2879  Acc:0.9365  fbScore:0.9068   LR -> Group0: 1.8301e-06 / Group1: 3.6601e-06 / Group2: 5.4902e-06 / Group3: 9.1503e-05 / Group4: 9.1503e-05 / Group5: 9.1503e-05 / 
  40/ 170  <train> Loss:0.3399  Acc:0.9373  fbScore:0.9026   LR -> Group0: 1.6993e-06 / Group1: 3.3987e-06 / Group2: 5.0980e-06 / Group3: 8.4967e-05 / Group4: 8.4967e-05 / Group5: 8.4967e-05 / 
  50/ 170  <train> Loss:0.3219  Acc:0.9366  fbScore:0.9088   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  60/ 170  <train> Loss:0.3051

  0%|          | 0/11 [00:00<?, ?it/s]

<val> Loss:0.9162  Acc:0.9497  fbScore:0.8863
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/54 [00:00<?, ?it/s]

Loss:0.3243  Acc:0.9419  fbScore:0.9354
fb_score : 0.94359594737638



In [17]:
logs.keys()

dict_keys(['fit_history', 'test_preds_labels', 'test_fb_score'])

In [18]:
logs['test_fb_score'][1]

0.9067357512953368

In [19]:
for i in range(hps.cv_n):
    fb_score = logs['test_fb_score'][i]
    print(fb_score)

0.9028831562974203
0.9067357512953368
0.884621200665356
0.892047172664046
0.8957654723127035


In [20]:
test_pred_df = pd.DataFrame(columns=[f"cv{i}" for i in range(hps.cv_n)] + ['cv_ensemble', 'label'])
test_pred_df['label'] = logs['test_preds_labels'][0]['labels']
for i in range(hps.cv_n):
    test_pred_df[f"cv{i}"] = logs['test_preds_labels'][i]['preds']




test_pred_df['cv_ensemble'] = test_pred_df.loc[:, 'cv0':'cv4'].mean(axis=1).map(lambda x: 1 if x >= 0.5 else 0)


display(test_pred_df)
display(test_pred_df.describe())

Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,0.0,0.0,0.0,0.0,0.0,0,0.0
2,0.0,0.0,0.0,0.0,0.0,0,0.0
3,0.0,0.0,0.0,0.0,0.0,0,0.0
4,0.0,0.0,0.0,0.0,0.0,0,0.0
...,...,...,...,...,...,...,...
5424,0.0,0.0,0.0,0.0,0.0,0,0.0
5425,0.0,0.0,0.0,0.0,1.0,0,0.0
5426,0.0,0.0,0.0,0.0,0.0,0,0.0
5427,0.0,0.0,0.0,0.0,0.0,0,0.0


Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
count,5429.0,5429.0,5429.0,5429.0,5429.0,5429.0,5429.0
mean,0.076626,0.071468,0.080862,0.081046,0.106834,0.076441,0.023209
std,0.266021,0.257629,0.272648,0.272931,0.30893,0.265727,0.15058
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [88]:
cv_ensemble_fb_score = fbeta_score(y_true=test_pred_df['label'], y_pred=test_pred_df['cv_ensemble'], beta=7.0)
print(f"CV_Ensemble_Fb_score : {cv_ensemble_fb_score}")

NameError: name 'test_pred_df' is not defined

# Submit

In [18]:
class Predicting:
    def __init__(self, weights_dir:str):
        self.weights_dir = weights_dir
        self.logs = {'test_preds_labels':[]}
        self.pred_df = pd.DataFrame(columns=[f"cv{i}" for i in range(hps.cv_n)] + ['cv_ensemble', 'label'])

    def predict(self, df, pred_phase='submit'):

        df.reset_index(drop=False, inplace=True)

        weights_path_list = os.listdir(self.weights_dir)
        for weights_path in weights_path_list:
            print('\033[32m' + weights_path + '\033[0m')

            # load model
            if hps.model_type == 'lstm':
                print(f"Choosed BertLstmModel")
                model = BertLstmModel(hidden_size=bert_config.hidden_size)
            elif hps.model_type == 'lstm_ex':
                print(f"Choosed BertLstmExModel")
                model = BertLstmExModel(hidden_size=bert_config.hidden_size, config=bert_config, use_hidden_n=4)
            
            # To GPU
            print(f"Model to GPU ... ", end='')
            model = model.to(device)
            device_num = torch.cuda.device_count()
            if device_num > 1:
                print(f"Use {device_num} GPUs ", end='')
                model = nn.DataParallel(model)
            print(f"Done!")

            # load
            print(f"Loading model ... ", end='')
            model.load_state_dict(torch.load(os.path.join(self.weights_dir, weights_path)))
            print(f"Done!")

            # Datasets / Dataloaders

            phase_param = {
                "argument":{'test': False, 'submit': False},
                "batch_size":{'test':hps.batch_size*4, 'submit': hps.batch_size*4},
                "shuffle":{'test': False, 'submit': False},
                "upsample_pos_n":{'val': 1, 'test': 1, 'submit': 1},
            }
            datasets = {phase:TextClassificationDataset(df=df, tokenizer=base_tokenizer, use_col=hps.use_col,\
                                                        token_max_length=hps.token_max_length, argument=phase_param['argument'][phase],\
                                                        upsample_pos_n=phase_param['upsample_pos_n'][phase]) for phase in ['test', 'submit']}
            dataloaders = {phase: DataLoader(datasets[phase], batch_size=phase_param['batch_size'][phase], \
                                            shuffle=phase_param['shuffle'][phase]) for phase in ['test', 'submit']}


            # inference
            preds_labels_dict = inference(model, dataloader=dataloaders[pred_phase], device=device, evaluate=False)
            self.logs['test_preds_labels'].append(preds_labels_dict)

            del model, datasets, dataloaders
            torch.cuda.empty_cache()
            print()

    def get_logs(self):
        return self.logs

    def ensemble(self):
        
        self.pred_df['label'] = self.logs['test_preds_labels'][0]['labels']
        for i in range(hps.cv_n):
            self.pred_df[f"cv{i}"] = self.logs['test_preds_labels'][i]['preds']
        self.pred_df['cv_ensemble'] = self.pred_df.loc[:, 'cv0':'cv4'].mean(axis=1).map(lambda x: 1 if x >= 0.5 else 0)
        return self.pred_df

    def get_fb_score(self):
        cv_ensemble_fb_score = fbeta_score(y_true=self.pred_df['label'], y_pred=self.pred_df['cv_ensemble'], beta=7.0)
        return cv_ensemble_fb_score


In [19]:
predict = Predicting(weights_dir='cross_validation_weights')

In [20]:
predict.predict(df=submit_df.copy(), pred_phase='submit')

[32mbert_text_classification_cv0.pth[0m
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model to GPU ... Use 4 GPUs Done!
Loading model ... Done!


  0%|          | 0/80 [00:00<?, ?it/s]


[32mbert_text_classification_cv1.pth[0m
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model to GPU ... Use 4 GPUs Done!
Loading model ... Done!


  0%|          | 0/80 [00:00<?, ?it/s]


[32mbert_text_classification_cv2.pth[0m
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model to GPU ... Use 4 GPUs Done!
Loading model ... Done!


  0%|          | 0/80 [00:00<?, ?it/s]


[32mbert_text_classification_cv3.pth[0m
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model to GPU ... Use 4 GPUs Done!
Loading model ... Done!


  0%|          | 0/80 [00:00<?, ?it/s]


[32mbert_text_classification_cv4.pth[0m
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model to GPU ... Use 4 GPUs Done!
Loading model ... Done!


  0%|          | 0/80 [00:00<?, ?it/s]




In [44]:
pred_df = predict.ensemble()

In [49]:
pred_df

Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
27145,0.0,0.0,0.0,0.0,0.0,0,-1.0
27146,0.0,0.0,0.0,0.0,0.0,0,-1.0
27147,0.0,0.0,0.0,0.0,0.0,0,-1.0
27148,0.0,0.0,0.0,0.0,0.0,0,-1.0
27149,0.0,0.0,0.0,0.0,0.0,0,-1.0
...,...,...,...,...,...,...,...
67974,0.0,0.0,0.0,0.0,0.0,0,-1.0
67975,0.0,0.0,0.0,0.0,0.0,0,-1.0
67976,0.0,0.0,0.0,0.0,0.0,0,-1.0
67977,0.0,0.0,0.0,0.0,0.0,0,-1.0


In [54]:
pred_df.describe()

Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
count,40834.0,40834.0,40834.0,40834.0,40834.0,40834.0,40834.0
mean,0.08275,0.063379,0.074766,0.078415,0.080595,0.072978,-1.0
std,0.275507,0.243646,0.263017,0.268827,0.272215,0.260104,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,-1.0


In [48]:
pred_df.index += len(orig_df)

In [50]:
sample_submit_df['judgement'] = pred_df['cv_ensemble']

In [51]:
sample_submit_df

Unnamed: 0,judgement
27145,0
27146,0
27147,0
27148,0
27149,0
...,...
67974,0
67975,0
67976,0
67977,0


In [52]:
cv_ensemble_fb_score = 0

In [53]:
jst = dt.timezone(dt.timedelta(hours=+9), 'JST')
dt_now = dt.datetime.now(jst)
dt_now_str = dt_now.strftime('%Y%m%d_%H%M')
submit_str = f"{dt_now_str}_{cv_ensemble_fb_score:.4f}".replace('.', '-')
submit_str = f"{submit_str}.csv"
print(submit_str)

os.makedirs('./submit', exist_ok=True)
sample_submit_df.to_csv(os.path.join('submit', submit_str), header=False)

20211002_0045_0-0000.csv
