In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch import optim
from torch.optim import lr_scheduler

import transformers
from transformers.optimization import Adafactor, AdafactorSchedule

from sklearn.metrics import fbeta_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os
import random
import time
from tqdm.notebook import tqdm
import datetime as dt
import copy
import matplotlib.pyplot as plt


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]='0,1,2,3'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cuda


In [3]:
model_name_dict = {
    "PubMedBERT": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext",
    "biomed_roberta_base": "allenai/biomed_roberta_base",
    "Bio_ClinicalBERT":"emilyalsentzer/Bio_ClinicalBERT",
}

class Hparams:
    def __init__(self):
        self.random_seed = 0 # BAD:2021
        self.data_dir = './data'
        self.output_dir = './outputs'
        self.batch_size = 128
        self.token_max_length = 512
        self.model_name = model_name_dict['PubMedBERT']
        self.num_epochs = 5
        self.class_1_weight = 150
        self.initial_lr = 2e-5  # 2e-5
        self.model_type = 'lstm_ex'  # cnn, lstm, lstm_ex
        self.upsample_pos_n = 1
        self.use_col = 'title_abstract'  # title, abstract, title_abstract
        self.train_argument = True
        self.cv_n = 5

hps = Hparams()


def seed_torch(seed:int):
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(hps.random_seed)

## DataFrame

In [4]:
orig_df = pd.read_csv(os.path.join(hps.data_dir, 'train.csv'), index_col=0)
submit_df = pd.read_csv(os.path.join(hps.data_dir, 'test.csv'), index_col=0)
sample_submit_df = pd.read_csv(os.path.join(hps.data_dir, 'sample_submit.csv'), index_col=0, header=None, names=['judgement'])

# 修正
orig_df.loc[2488, 'judgement'] = 0
orig_df.loc[7708, 'judgement'] = 0

# 補完
orig_df['abstract'].fillna('', inplace=True)
orig_df['title_abstract'] = orig_df.title + orig_df.abstract

submit_df['abstract'].fillna('', inplace=True)
submit_df['title_abstract'] = submit_df.title + submit_df.abstract
submit_df['judgement'] = -1
submit_df.reset_index(inplace=True, drop=False)

## Cross Validations SetUp

In [5]:
train_df, test_df = train_test_split(orig_df, test_size=0.2, random_state=hps.random_seed, shuffle=True, stratify=orig_df.judgement)

In [6]:
def get_cv_number(df, cv_n):

    df['cv_id'] = 0

    neg_idx = df.loc[df.judgement==0].index.tolist()
    pos_idx = df.loc[df.judgement==1].index.tolist()

    neg_idx = [list(a) for a in list(np.array_split(random.sample(neg_idx, len(neg_idx)), cv_n))]
    pos_idx = [list(a) for a in list(np.array_split(random.sample(pos_idx, len(pos_idx)), cv_n))]

    for i in range(cv_n):
        n_id = neg_idx[i]
        p_id = pos_idx[i]
        df.loc[n_id, 'cv_id'] = i
        df.loc[p_id, 'cv_id'] = i

    df = df.sort_index()

    for i in range(cv_n):
        tmp_df = df.loc[df.cv_id==i]
        print('cv_id:', i, '->  pos:', len(tmp_df.loc[tmp_df.judgement==1]), ' / neg:', len(tmp_df.loc[tmp_df.judgement==0]), ' / all:', len(tmp_df))
        
    return df


train_df = get_cv_number(train_df, cv_n=hps.cv_n)

cv_id: 0 ->  pos: 101  / neg: 4243  / all: 4344
cv_id: 1 ->  pos: 101  / neg: 4243  / all: 4344
cv_id: 2 ->  pos: 101  / neg: 4242  / all: 4343
cv_id: 3 ->  pos: 101  / neg: 4242  / all: 4343
cv_id: 4 ->  pos: 100  / neg: 4242  / all: 4342


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['cv_id'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


## Hugging Face

In [7]:
base_tokenizer = transformers.AutoTokenizer.from_pretrained(hps.model_name)

bert_config = transformers.AutoConfig.from_pretrained(hps.model_name)
bert_config.output_hidden_states = True

## DataSet / DataLoader

In [8]:
class TextClassificationDataset(Dataset):
    def __init__(self, df, tokenizer, use_col='title_abstract', token_max_length=512, argument=False, upsample_pos_n=1):

        if upsample_pos_n > 1:
            df_pos = df.loc[df.judgement==1]
            df_pos = pd.concat([df_pos for i in range(int(upsample_pos_n))], axis=0).reset_index(drop=True)
            df_neg = df.loc[df.judgement==0]
            self.df = pd.concat([df_pos, df_neg], axis=0).reset_index(drop=True)
        else:
            self.df = df
        
        self.tokenizer = tokenizer
        self.argument = argument
        self.use_col = use_col

    def text_argument(self, text, drop_min_seq=3, seq_sort=True):
        seq_list = text.split('. ')
        seq_len = len(seq_list)
        if seq_len >= drop_min_seq:
            orig_idx_list = list(range(0, seq_len))
            idx_list = random.sample(orig_idx_list, random.randint(round(seq_len * 0.7), seq_len))
            if seq_sort:
                idx_list = sorted(idx_list)
            insert_idx_list = random.sample(orig_idx_list, random.randint(0, seq_len//3))
            for x in insert_idx_list:
                idx = random.randint(0, len(idx_list))
                idx_list.insert(idx, x)
            seq_list = [seq_list[i] for i in idx_list]
        text = '. '.join(seq_list)
        return text

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        text = self.df.loc[idx, self.use_col]

        if self.argument:
            text = self.text_argument(text, drop_min_seq=3, seq_sort=True)

        token = self.tokenizer.encode_plus(
            text,
            padding = 'max_length', max_length = hps.token_max_length, truncation = True,
            return_attention_mask=True, return_tensors='pt'
        )

        sample = dict(
            input_ids=token['input_ids'][0],
            attention_mask=token['attention_mask'][0]
        )
        
        label = torch.tensor(self.df.loc[idx, 'judgement'], dtype=torch.float32)
        return sample, label
        

## Model

In [9]:
class BertLstmModel(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.bert = transformers.AutoModel.from_pretrained(hps.model_name, config=bert_config)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True, bidirectional=True)
        self.leakyrelu = nn.LeakyReLU()
        self.dropout = nn.Dropout(p=0.5)
        self.regressor = nn.Linear(hidden_size*2, 1)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        self.lstm.flatten_parameters()
        out, _ = self.lstm(outputs['last_hidden_state'], None)
        out = self.leakyrelu(out)
        sequence_output = out[:, -1, :]
        output = self.dropout(sequence_output)
        logits = torch.flatten(self.regressor(output))
        return logits

In [10]:
class BertLstmExModel(nn.Module):
    def __init__(self, hidden_size, config, use_hidden_n=10):
        super().__init__()
        
        self.bert = transformers.AutoModel.from_pretrained(hps.model_name, config=bert_config)
        self.hidden_size = hidden_size
        self.use_hidden_n = use_hidden_n
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, batch_first=True, bidirectional=True)
        self.leakyrelu = nn.LeakyReLU()
        self.dropout = nn.Dropout(p=0.3)
        self.conv1d = nn.Conv1d(in_channels=self.use_hidden_n, out_channels=1, kernel_size=3, padding='same')
        self.regressor = nn.Linear(self.hidden_size*2, 1)
        
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states_list = [outputs['hidden_states'][-1*i] for i in range(1, self.use_hidden_n+1)]
        self.lstm.flatten_parameters()
        out_list = [
            self.dropout(
                self.leakyrelu(
                    self.lstm(hidden_state, None)[0]
                )[:, -1, :]
            ).view(-1, 1, self.hidden_size*2)  # (batch, use_hidden_n, hidden_size*2)
        for hidden_state in hidden_states_list]

        out = torch.cat(out_list, dim=1)

        out = self.dropout(self.leakyrelu(self.conv1d(out)))
        out = out.view(out.size(0), -1)

        logits = torch.flatten(self.regressor(out))
        return logits

## Checkpoint

In [11]:
class ModelCheckpoint:
    def __init__(self, save_dir:str, save_name:str, cv_id:int):
        os.makedirs(save_dir, exist_ok=True)
        self.cv_id = cv_id
        self.save_dir = save_dir
        self.save_name = save_name
        self.best_loss = self.best_acc = self.best_fbeta_score = 0.0

    def get_checkpoint_name(self):
        checkpoint_name = f"{self.save_name.replace('/', '_')}_cv{self.cv_id}.pth"
        checkpoint_name = os.path.join(self.save_dir, checkpoint_name)
        return checkpoint_name

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.get_checkpoint_name())

    def load_checkpoint(self, model=None, manual_name=None):
        if manual_name is None:
            checkpoint_name = self.get_checkpoint_name()
        else:
            checkpoint_name = manual_name
        print(checkpoint_name)
        model.load_state_dict(torch.load(checkpoint_name))
        return model

## Fit

In [12]:
def fit(dataloaders, model, optimizer, num_epochs, device, batch_size, lr_scheduler, cv_id):

    seed_torch(hps.random_seed)

    history = {
        'train':{'loss':[], 'acc':[], 'fbscore':[]},
        'val':{'loss':[], 'acc':[], 'fbscore':[]},
        'lr':[],
    }

    checkpoint = ModelCheckpoint(save_dir='cross_validation_weights', save_name='bert_text_classification', cv_id=cv_id)
    best_model_wts = copy.deepcopy(model.state_dict())

    print(f"Using device : {device}")
    for epoch in range(num_epochs):
        print(f"【Epoch {epoch+1: 3}/{num_epochs: 3}】   LR -> ", end='')
        for i, params in enumerate(optimizer.param_groups):
            print(f"Group{i}: {params['lr']:.4e}", end=' / ')
        print('')

        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0
            running_fbeta_score = 0.0
            if phase == 'train':
                model.train()
            else:
                model.eval()
            for i, (inputs, labels) in enumerate(tqdm(dataloaders[phase])):
                input_ids = inputs['input_ids']
                attention_mask = inputs['attention_mask']
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    logits_outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    pos_weight = torch.tensor([hps.class_1_weight for i in range(input_ids.size(0))]).to(device)
                    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
                    loss = criterion(logits_outputs, labels)

                    outputs = torch.sigmoid(logits_outputs)
                    preds = torch.where(outputs >= 0.5, 1, 0)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        lr_scheduler.step()

                running_loss += loss.item() * input_ids.size(0)
                running_corrects += torch.sum(preds == labels)
                running_fbeta_score += fbeta_score(labels.to('cpu').detach().numpy(), preds.to('cpu').detach().numpy(), beta=7.0, zero_division=0) * input_ids.size(0)    

                if phase == 'train':
                    if i % 10 == 9:
                        total_num = float((i * batch_size) + input_ids.size(0))
                        print(f"{i+1: 4}/{len(dataloaders[phase]): 4}  <{phase}> Loss:{(running_loss/total_num):.4f}  Acc:{(running_corrects/total_num):.4f}  fbScore:{(running_fbeta_score/total_num):.4f}   LR -> ", end='')
                        for i, params in enumerate(optimizer.param_groups):
                            print(f"Group{i}: {params['lr']:.4e}", end=' / ')
                            if isinstance(optimizer.param_groups[0]['lr'], float):
                                history['lr'].append(optimizer.param_groups[0]['lr'])
                            else:
                                history['lr'].append(optimizer.param_groups[0]['lr'].item())
                        print('')

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            epoch_fbscore = running_fbeta_score / len(dataloaders[phase].dataset)
            
            print(f"<{phase}> Loss:{epoch_loss:.4f}  Acc:{epoch_acc:.4f}  fbScore:{epoch_fbscore:.4f}")

            history[phase]['loss'].append(epoch_loss)
            history[phase]['acc'].append(epoch_acc.item())
            history[phase]['fbscore'].append(epoch_fbscore)


            if phase == 'val' and epoch_fbscore > checkpoint.best_fbeta_score:
                print(f"Checkpoints have been updated to the epoch {epoch+1} weights.")
                checkpoint.best_loss = epoch_loss
                checkpoint.best_acc = epoch_acc
                checkpoint.best_fbeta_score = epoch_fbscore
                checkpoint.best_epoch = epoch+1
                best_model_wts = copy.deepcopy(model.state_dict())

        print('-' * 150)

    model.load_state_dict(best_model_wts)
    checkpoint.save_checkpoint(model)
    torch.cuda.empty_cache()

    return model, history

## Inference

In [13]:
def inference(model, dataloader, device, evaluate=True):
    
    running_loss = 0.0
    running_corrects = 0
    running_fbeta_score = 0.0

    preds_labels_dict = dict(preds = np.empty(0), labels = np.empty(0))

    model.eval()

    for i, (inputs, labels) in enumerate(tqdm(dataloader)):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            logits_outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            if evaluate:
                pos_weight = torch.tensor([hps.class_1_weight for i in range(input_ids.size(0))]).to(device)
                criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
                loss = criterion(logits_outputs, labels)

            outputs = torch.sigmoid(logits_outputs)
            preds = torch.where(outputs >= 0.5, 1, 0)
            
            if evaluate:
                running_loss += loss.item() * input_ids.size(0)
                running_corrects += torch.sum(preds == labels)
                running_fbeta_score += fbeta_score(labels.to('cpu').detach().numpy(), preds.to('cpu').detach().numpy(), beta=7.0, zero_division=0) * input_ids.size(0)

            preds_labels_dict['preds']  = np.hstack([preds_labels_dict['preds'], preds.to('cpu').detach().numpy().copy()])
            preds_labels_dict['labels']  = np.hstack([preds_labels_dict['labels'], labels.to('cpu').detach().numpy().copy()])
    
    if evaluate:
        loss = running_loss / len(dataloader.dataset)
        acc = running_corrects / len(dataloader.dataset)
        fbscore = running_fbeta_score / len(dataloader.dataset)
        print(f"Loss:{loss:.4f}  Acc:{acc:.4f}  fbScore:{fbscore:.4f}")
    return preds_labels_dict

## CrossValidation Loop

In [14]:
def model_setup(model, dataloaders):

    optimizer = optim.AdamW(
        params=[
            {'params': model.bert.embeddings.parameters(), 'lr': 1e-5},
            {'params': model.bert.encoder.parameters(), 'lr': 2e-5},
            {'params': model.bert.pooler.parameters(), 'lr': 3e-5},
            {'params': model.lstm.parameters(), 'lr': 5e-4},
            {'params': model.conv1d.parameters(), 'lr': 5e-4},
            {'params': model.regressor.parameters(), 'lr': 5e-4}
        ]
    )
    num_warmup_steps = round(hps.num_epochs * len(dataloaders['train']) * 0.1)
    num_training_steps = round(hps.num_epochs * len(dataloaders['train']))
    print(f"InitLR:{hps.initial_lr} / num_warmup_steps:{num_warmup_steps} / num_training_steps:{num_training_steps}")
    lr_scheduler = transformers.get_linear_schedule_with_warmup(optimizer=optimizer, num_warmup_steps=num_warmup_steps, 
                                                                num_training_steps=num_training_steps, last_epoch=-1)

    return (optimizer, lr_scheduler)

In [15]:
def cross_validation(cv_n, orig_df, test_df):

    logs = {
        'fit_history':[],
        'test_preds_labels':[],
        'test_fb_score':[],
    }

    for i in range(cv_n):
        print('\033[32m' + f"Cross-validation loop : {i+1}/{cv_n}" + '\033[0m')

        # DataFrame
        train_df = orig_df.loc[orig_df.cv_id != i].copy().reset_index(drop=True)
        valid_df = orig_df.loc[orig_df.cv_id == i].copy().reset_index(drop=True)
        test_df = test_df.reset_index(drop=True)
        print(f"Train  ->  label_1:{train_df.judgement.sum()} / all:{train_df.judgement.count()}   ({train_df.judgement.sum() / train_df.judgement.count() * 100:.3f}%)")
        print(f"Valid  ->  label_1:{valid_df.judgement.sum()} / all:{valid_df.judgement.count()}   ({valid_df.judgement.sum() / valid_df.judgement.count() * 100:.3f}%)")

        # Dataset / Dataloader
        phase_param = {
            "df":{'train': train_df, 'val': valid_df, 'test': test_df, 'submit': submit_df},
            "argument":{'train': hps.train_argument, 'val': False, 'test': False, 'submit': False},
            "batch_size":{'train':hps.batch_size, 'val':hps.batch_size*4, 'test':hps.batch_size*4, 'submit': hps.batch_size*4},
            "shuffle":{'train': True, 'val': False, 'test': False, 'submit': False},
            "upsample_pos_n":{'train': hps.upsample_pos_n, 'val': 1, 'test': 1, 'submit': 1},
        }
        datasets = {phase:TextClassificationDataset(df=phase_param['df'][phase], tokenizer=base_tokenizer, use_col=hps.use_col,\
                                                    token_max_length=hps.token_max_length, argument=phase_param['argument'][phase],\
                                                    upsample_pos_n=phase_param['upsample_pos_n'][phase]) for phase in ['train', 'val', 'test', 'submit']}
        dataloaders = {phase: DataLoader(datasets[phase], batch_size=phase_param['batch_size'][phase], \
                                        shuffle=phase_param['shuffle'][phase]) for phase in ['train', 'val', 'test', 'submit']}
        
        # Model / Optimizer
        if hps.model_type == 'lstm':
            print(f"Choosed BertLstmModel")
            model = BertLstmModel(hidden_size=bert_config.hidden_size)
        elif hps.model_type == 'lstm_ex':
            print(f"Choosed BertLstmExModel")
            model = BertLstmExModel(hidden_size=bert_config.hidden_size, config=bert_config, use_hidden_n=4)

        optimizer, lr_scheduler = model_setup(model, dataloaders)
        model = model.to(device)
        device_num = torch.cuda.device_count()
        if device_num > 1:
            print(f"Use {device_num} GPUs")
            model = nn.DataParallel(model)

        # Training / Validation
        model, fit_history = fit(dataloaders=dataloaders, model=model, optimizer=optimizer, num_epochs=hps.num_epochs, 
                             device=device, batch_size=hps.batch_size, lr_scheduler=lr_scheduler, cv_id=i)

        # Evaluate
        print(f"Evaluate Test Dataset")
        test_preds_labels_dict = inference(model, dataloader=dataloaders['test'], device=device)
        test_fb_score = fbeta_score(y_true=test_preds_labels_dict['labels'], y_pred=test_preds_labels_dict['preds'], beta=7.0)
        print(f"fb_score : {test_fb_score}")   

        logs['fit_history'].append(fit_history)
        logs['test_preds_labels'].append(test_preds_labels_dict)
        logs['test_fb_score'].append(test_fb_score)

        del model, datasets, dataloaders
        torch.cuda.empty_cache()
        print()

    return logs


In [16]:
logs = cross_validation(cv_n=hps.cv_n, orig_df=train_df, test_df=test_df)

[32mCross-validation loop : 1/5[0m
Train  ->  label_1:403 / all:17372   (2.320%)
Valid  ->  label_1:101 / all:4344   (2.325%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:68 / num_training_steps:680
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:2.7276  Acc:0.0563  fbScore:0.4855   LR -> Group0: 1.4706e-06 / Group1: 2.9412e-06 / Group2: 4.4118e-06 / Group3: 7.3529e-05 / Group4: 7.3529e-05 / Group5: 7.3529e-05 / 
  20/ 136  <train> Loss:2.4087  Acc:0.0363  fbScore:0.4355   LR -> Group0: 2.9412e-06 / Group1: 5.8824e-06 / Group2: 8.8235e-06 / Group3: 1.4706e-04 / Group4: 1.4706e-04 / Group5: 1.4706e-04 / 
  30/ 136  <train> Loss:2.4440  Acc:0.0333  fbScore:0.4762   LR -> Group0: 4.4118e-06 / Group1: 8.8235e-06 / Group2: 1.3235e-05 / Group3: 2.2059e-04 / Group4: 2.2059e-04 / Group5: 2.2059e-04 / 
  40/ 136  <train> Loss:2.4408  Acc:0.0311  fbScore:0.4866   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  50/ 136  <train> Loss:2.3803  Acc:0.0287  fbScore:0.4839   LR -> Group0: 7.3529e-06 / Group1: 1.4706e-05 / Group2: 2.2059e-05 / Group3: 3.6765e-04 / Group4: 3.6765e-04 / Group5: 3.6765e-04 / 
  60/ 136  <train> Loss:2.3383

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.7218  Acc:0.8557  fbScore:0.8562
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.7792  Acc:0.7492  fbScore:0.7861   LR -> Group0: 8.7255e-06 / Group1: 1.7451e-05 / Group2: 2.6176e-05 / Group3: 4.3627e-04 / Group4: 4.3627e-04 / Group5: 4.3627e-04 / 
  20/ 136  <train> Loss:0.9310  Acc:0.7781  fbScore:0.7051   LR -> Group0: 8.5621e-06 / Group1: 1.7124e-05 / Group2: 2.5686e-05 / Group3: 4.2810e-04 / Group4: 4.2810e-04 / Group5: 4.2810e-04 / 
  30/ 136  <train> Loss:0.8893  Acc:0.7576  fbScore:0.7276   LR -> Group0: 8.3987e-06 / Group1: 1.6797e-05 / Group2: 2.5196e-05 / Group3: 4.1993e-04 / Group4: 4.1993e-04 / Group5: 4.1993e-04 / 
  40/ 136  <train> Loss:0.8160  Acc:0.7906  fbScore:0.7682   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  50/ 136  <train> Loss:0.7657  Acc:0.8078  fbScore:0.7532   LR -> Group0: 8.0719e-06 / Group1: 1.6144e-05 / Group2: 2.4216e-05 / Group3: 4.0359e-04 / Group4: 4.0359e-04 / Group5: 4.0359e-04 / 
  60/ 136  <train> Loss:0.7161

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.5727  Acc:0.8725  fbScore:0.8783
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.3775  Acc:0.8961  fbScore:0.8870   LR -> Group0: 6.5033e-06 / Group1: 1.3007e-05 / Group2: 1.9510e-05 / Group3: 3.2516e-04 / Group4: 3.2516e-04 / Group5: 3.2516e-04 / 
  20/ 136  <train> Loss:0.4886  Acc:0.9094  fbScore:0.8784   LR -> Group0: 6.3399e-06 / Group1: 1.2680e-05 / Group2: 1.9020e-05 / Group3: 3.1699e-04 / Group4: 3.1699e-04 / Group5: 3.1699e-04 / 
  30/ 136  <train> Loss:0.5267  Acc:0.8771  fbScore:0.8585   LR -> Group0: 6.1765e-06 / Group1: 1.2353e-05 / Group2: 1.8529e-05 / Group3: 3.0882e-04 / Group4: 3.0882e-04 / Group5: 3.0882e-04 / 
  40/ 136  <train> Loss:0.5161  Acc:0.8682  fbScore:0.8625   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  50/ 136  <train> Loss:0.5246  Acc:0.8733  fbScore:0.8650   LR -> Group0: 5.8497e-06 / Group1: 1.1699e-05 / Group2: 1.7549e-05 / Group3: 2.9248e-04 / Group4: 2.9248e-04 / Group5: 2.9248e-04 / 
  60/ 136  <train> Loss:0.5062

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6640  Acc:0.8994  fbScore:0.8888
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.2858  Acc:0.9094  fbScore:0.8691   LR -> Group0: 4.2810e-06 / Group1: 8.5621e-06 / Group2: 1.2843e-05 / Group3: 2.1405e-04 / Group4: 2.1405e-04 / Group5: 2.1405e-04 / 
  20/ 136  <train> Loss:0.2882  Acc:0.9309  fbScore:0.9122   LR -> Group0: 4.1176e-06 / Group1: 8.2353e-06 / Group2: 1.2353e-05 / Group3: 2.0588e-04 / Group4: 2.0588e-04 / Group5: 2.0588e-04 / 
  30/ 136  <train> Loss:0.2987  Acc:0.9354  fbScore:0.9257   LR -> Group0: 3.9542e-06 / Group1: 7.9085e-06 / Group2: 1.1863e-05 / Group3: 1.9771e-04 / Group4: 1.9771e-04 / Group5: 1.9771e-04 / 
  40/ 136  <train> Loss:0.2743  Acc:0.9365  fbScore:0.9275   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  50/ 136  <train> Loss:0.2568  Acc:0.9397  fbScore:0.8553   LR -> Group0: 3.6275e-06 / Group1: 7.2549e-06 / Group2: 1.0882e-05 / Group3: 1.8137e-04 / Group4: 1.8137e-04 / Group5: 1.8137e-04 / 
  60/ 136  <train> Loss:0.2489

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6793  Acc:0.8877  fbScore:0.8795
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.3026  Acc:0.9094  fbScore:0.8182   LR -> Group0: 2.0588e-06 / Group1: 4.1176e-06 / Group2: 6.1765e-06 / Group3: 1.0294e-04 / Group4: 1.0294e-04 / Group5: 1.0294e-04 / 
  20/ 136  <train> Loss:0.2977  Acc:0.9176  fbScore:0.8688   LR -> Group0: 1.8954e-06 / Group1: 3.7908e-06 / Group2: 5.6863e-06 / Group3: 9.4771e-05 / Group4: 9.4771e-05 / Group5: 9.4771e-05 / 
  30/ 136  <train> Loss:0.3122  Acc:0.9216  fbScore:0.8872   LR -> Group0: 1.7320e-06 / Group1: 3.4641e-06 / Group2: 5.1961e-06 / Group3: 8.6601e-05 / Group4: 8.6601e-05 / Group5: 8.6601e-05 / 
  40/ 136  <train> Loss:0.3109  Acc:0.9215  fbScore:0.8966   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  50/ 136  <train> Loss:0.2966  Acc:0.9248  fbScore:0.9057   LR -> Group0: 1.4052e-06 / Group1: 2.8105e-06 / Group2: 4.2157e-06 / Group3: 7.0261e-05 / Group4: 7.0261e-05 / Group5: 7.0261e-05 / 
  60/ 136  <train> Loss:0.2807

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.7810  Acc:0.9466  fbScore:0.9166
Checkpoints have been updated to the epoch 5 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/11 [00:00<?, ?it/s]

Loss:0.7331  Acc:0.9440  fbScore:0.9039
fb_score : 0.9028831562974203

[32mCross-validation loop : 2/5[0m
Train  ->  label_1:403 / all:17372   (2.320%)
Valid  ->  label_1:101 / all:4344   (2.325%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:68 / num_training_steps:680
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:3.2418  Acc:0.0250  fbScore:0.4800   LR -> Group0: 1.4706e-06 / Group1: 2.9412e-06 / Group2: 4.4118e-06 / Group3: 7.3529e-05 / Group4: 7.3529e-05 / Group5: 7.3529e-05 / 
  20/ 136  <train> Loss:3.2798  Acc:0.0254  fbScore:0.5135   LR -> Group0: 2.9412e-06 / Group1: 5.8824e-06 / Group2: 8.8235e-06 / Group3: 1.4706e-04 / Group4: 1.4706e-04 / Group5: 1.4706e-04 / 
  30/ 136  <train> Loss:3.1058  Acc:0.0237  fbScore:0.4959   LR -> Group0: 4.4118e-06 / Group1: 8.8235e-06 / Group2: 1.3235e-05 / Group3: 2.2059e-04 / Group4: 2.2059e-04 / Group5: 2.2059e-04 / 
  40/ 136  <train> Loss:3.1136  Acc:0.0238  fbScore:0.4986   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  50/ 136  <train> Loss:3.0183  Acc:0.0230  fbScore:0.4925   LR -> Group0: 7.3529e-06 / Group1: 1.4706e-05 / Group2: 2.2059e-05 / Group3: 3.6765e-04 / Group4: 3.6765e-04 / Group5: 3.6765e-04 / 
  60/ 136  <train> Loss:2.9867

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:1.3290  Acc:0.7884  fbScore:0.8326
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:1.2511  Acc:0.6477  fbScore:0.7373   LR -> Group0: 8.7255e-06 / Group1: 1.7451e-05 / Group2: 2.6176e-05 / Group3: 4.3627e-04 / Group4: 4.3627e-04 / Group5: 4.3627e-04 / 
  20/ 136  <train> Loss:1.1637  Acc:0.7492  fbScore:0.7477   LR -> Group0: 8.5621e-06 / Group1: 1.7124e-05 / Group2: 2.5686e-05 / Group3: 4.2810e-04 / Group4: 4.2810e-04 / Group5: 4.2810e-04 / 
  30/ 136  <train> Loss:1.2462  Acc:0.7281  fbScore:0.7194   LR -> Group0: 8.3987e-06 / Group1: 1.6797e-05 / Group2: 2.5196e-05 / Group3: 4.1993e-04 / Group4: 4.1993e-04 / Group5: 4.1993e-04 / 
  40/ 136  <train> Loss:1.1731  Acc:0.7143  fbScore:0.7052   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  50/ 136  <train> Loss:1.1888  Acc:0.7483  fbScore:0.7362   LR -> Group0: 8.0719e-06 / Group1: 1.6144e-05 / Group2: 2.4216e-05 / Group3: 4.0359e-04 / Group4: 4.0359e-04 / Group5: 4.0359e-04 / 
  60/ 136  <train> Loss:1.1670

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6394  Acc:0.8543  fbScore:0.8685
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.5307  Acc:0.8797  fbScore:0.8248   LR -> Group0: 6.5033e-06 / Group1: 1.3007e-05 / Group2: 1.9510e-05 / Group3: 3.2516e-04 / Group4: 3.2516e-04 / Group5: 3.2516e-04 / 
  20/ 136  <train> Loss:0.4862  Acc:0.8941  fbScore:0.8613   LR -> Group0: 6.3399e-06 / Group1: 1.2680e-05 / Group2: 1.9020e-05 / Group3: 3.1699e-04 / Group4: 3.1699e-04 / Group5: 3.1699e-04 / 
  30/ 136  <train> Loss:0.5260  Acc:0.8875  fbScore:0.8704   LR -> Group0: 6.1765e-06 / Group1: 1.2353e-05 / Group2: 1.8529e-05 / Group3: 3.0882e-04 / Group4: 3.0882e-04 / Group5: 3.0882e-04 / 
  40/ 136  <train> Loss:0.4954  Acc:0.8867  fbScore:0.8676   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  50/ 136  <train> Loss:0.5444  Acc:0.8975  fbScore:0.8774   LR -> Group0: 5.8497e-06 / Group1: 1.1699e-05 / Group2: 1.7549e-05 / Group3: 2.9248e-04 / Group4: 2.9248e-04 / Group5: 2.9248e-04 / 
  60/ 136  <train> Loss:0.5728

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.8938  Acc:0.9475  fbScore:0.8904
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.8074  Acc:0.9453  fbScore:0.8177   LR -> Group0: 4.2810e-06 / Group1: 8.5621e-06 / Group2: 1.2843e-05 / Group3: 2.1405e-04 / Group4: 2.1405e-04 / Group5: 2.1405e-04 / 
  20/ 136  <train> Loss:0.5718  Acc:0.9344  fbScore:0.8606   LR -> Group0: 4.1176e-06 / Group1: 8.2353e-06 / Group2: 1.2353e-05 / Group3: 2.0588e-04 / Group4: 2.0588e-04 / Group5: 2.0588e-04 / 
  30/ 136  <train> Loss:0.5057  Acc:0.9250  fbScore:0.8781   LR -> Group0: 3.9542e-06 / Group1: 7.9085e-06 / Group2: 1.1863e-05 / Group3: 1.9771e-04 / Group4: 1.9771e-04 / Group5: 1.9771e-04 / 
  40/ 136  <train> Loss:0.4985  Acc:0.9287  fbScore:0.8932   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  50/ 136  <train> Loss:0.4698  Acc:0.9272  fbScore:0.8785   LR -> Group0: 3.6275e-06 / Group1: 7.2549e-06 / Group2: 1.0882e-05 / Group3: 1.8137e-04 / Group4: 1.8137e-04 / Group5: 1.8137e-04 / 
  60/ 136  <train> Loss:0.4544

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6628  Acc:0.9438  fbScore:0.8910
Checkpoints have been updated to the epoch 4 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.2665  Acc:0.9398  fbScore:0.8556   LR -> Group0: 2.0588e-06 / Group1: 4.1176e-06 / Group2: 6.1765e-06 / Group3: 1.0294e-04 / Group4: 1.0294e-04 / Group5: 1.0294e-04 / 
  20/ 136  <train> Loss:0.2295  Acc:0.9465  fbScore:0.8576   LR -> Group0: 1.8954e-06 / Group1: 3.7908e-06 / Group2: 5.6863e-06 / Group3: 9.4771e-05 / Group4: 9.4771e-05 / Group5: 9.4771e-05 / 
  30/ 136  <train> Loss:0.2249  Acc:0.9490  fbScore:0.8876   LR -> Group0: 1.7320e-06 / Group1: 3.4641e-06 / Group2: 5.1961e-06 / Group3: 8.6601e-05 / Group4: 8.6601e-05 / Group5: 8.6601e-05 / 
  40/ 136  <train> Loss:0.2055  Acc:0.9529  fbScore:0.9061   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  50/ 136  <train> Loss:0.2225  Acc:0.9539  fbScore:0.8932   LR -> Group0: 1.4052e-06 / Group1: 2.8105e-06 / Group2: 4.2157e-06 / Group3: 7.0261e-05 / Group4: 7.0261e-05 / Group5: 7.0261e-05 / 
  60/ 136  <train> Loss:0.2440

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.9869  Acc:0.9523  fbScore:0.8784
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/11 [00:00<?, ?it/s]

Loss:0.9032  Acc:0.9492  fbScore:0.9020
fb_score : 0.9067357512953368

[32mCross-validation loop : 3/5[0m
Train  ->  label_1:403 / all:17373   (2.320%)
Valid  ->  label_1:101 / all:4343   (2.326%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:68 / num_training_steps:680
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:3.3205  Acc:0.0258  fbScore:0.5331   LR -> Group0: 1.4706e-06 / Group1: 2.9412e-06 / Group2: 4.4118e-06 / Group3: 7.3529e-05 / Group4: 7.3529e-05 / Group5: 7.3529e-05 / 
  20/ 136  <train> Loss:3.2395  Acc:0.0250  fbScore:0.5154   LR -> Group0: 2.9412e-06 / Group1: 5.8824e-06 / Group2: 8.8235e-06 / Group3: 1.4706e-04 / Group4: 1.4706e-04 / Group5: 1.4706e-04 / 
  30/ 136  <train> Loss:3.2069  Acc:0.0247  fbScore:0.5155   LR -> Group0: 4.4118e-06 / Group1: 8.8235e-06 / Group2: 1.3235e-05 / Group3: 2.2059e-04 / Group4: 2.2059e-04 / Group5: 2.2059e-04 / 
  40/ 136  <train> Loss:3.2158  Acc:0.0250  fbScore:0.5179   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  50/ 136  <train> Loss:3.1489  Acc:0.0248  fbScore:0.5172   LR -> Group0: 7.3529e-06 / Group1: 1.4706e-05 / Group2: 2.2059e-05 / Group3: 3.6765e-04 / Group4: 3.6765e-04 / Group5: 3.6765e-04 / 
  60/ 136  <train> Loss:3.1408

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:1.2794  Acc:0.7138  fbScore:0.7818
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:1.3371  Acc:0.8164  fbScore:0.7251   LR -> Group0: 8.7255e-06 / Group1: 1.7451e-05 / Group2: 2.6176e-05 / Group3: 4.3627e-04 / Group4: 4.3627e-04 / Group5: 4.3627e-04 / 
  20/ 136  <train> Loss:1.0341  Acc:0.8359  fbScore:0.8000   LR -> Group0: 8.5621e-06 / Group1: 1.7124e-05 / Group2: 2.5686e-05 / Group3: 4.2810e-04 / Group4: 4.2810e-04 / Group5: 4.2810e-04 / 
  30/ 136  <train> Loss:0.9558  Acc:0.8505  fbScore:0.8326   LR -> Group0: 8.3987e-06 / Group1: 1.6797e-05 / Group2: 2.5196e-05 / Group3: 4.1993e-04 / Group4: 4.1993e-04 / Group5: 4.1993e-04 / 
  40/ 136  <train> Loss:0.9309  Acc:0.8541  fbScore:0.8344   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  50/ 136  <train> Loss:0.9818  Acc:0.8517  fbScore:0.8076   LR -> Group0: 8.0719e-06 / Group1: 1.6144e-05 / Group2: 2.4216e-05 / Group3: 4.0359e-04 / Group4: 4.0359e-04 / Group5: 4.0359e-04 / 
  60/ 136  <train> Loss:0.9836

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.7118  Acc:0.8416  fbScore:0.8554
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.5409  Acc:0.8734  fbScore:0.8762   LR -> Group0: 6.5033e-06 / Group1: 1.3007e-05 / Group2: 1.9510e-05 / Group3: 3.2516e-04 / Group4: 3.2516e-04 / Group5: 3.2516e-04 / 
  20/ 136  <train> Loss:0.4521  Acc:0.9043  fbScore:0.9050   LR -> Group0: 6.3399e-06 / Group1: 1.2680e-05 / Group2: 1.9020e-05 / Group3: 3.1699e-04 / Group4: 3.1699e-04 / Group5: 3.1699e-04 / 
  30/ 136  <train> Loss:0.5653  Acc:0.9164  fbScore:0.9178   LR -> Group0: 6.1765e-06 / Group1: 1.2353e-05 / Group2: 1.8529e-05 / Group3: 3.0882e-04 / Group4: 3.0882e-04 / Group5: 3.0882e-04 / 
  40/ 136  <train> Loss:0.5821  Acc:0.8977  fbScore:0.9061   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  50/ 136  <train> Loss:0.5643  Acc:0.8931  fbScore:0.8786   LR -> Group0: 5.8497e-06 / Group1: 1.1699e-05 / Group2: 1.7549e-05 / Group3: 2.9248e-04 / Group4: 2.9248e-04 / Group5: 2.9248e-04 / 
  60/ 136  <train> Loss:0.5952

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6003  Acc:0.8837  fbScore:0.8777
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.4055  Acc:0.8953  fbScore:0.9014   LR -> Group0: 4.2810e-06 / Group1: 8.5621e-06 / Group2: 1.2843e-05 / Group3: 2.1405e-04 / Group4: 2.1405e-04 / Group5: 2.1405e-04 / 
  20/ 136  <train> Loss:0.6148  Acc:0.8977  fbScore:0.8587   LR -> Group0: 4.1176e-06 / Group1: 8.2353e-06 / Group2: 1.2353e-05 / Group3: 2.0588e-04 / Group4: 2.0588e-04 / Group5: 2.0588e-04 / 
  30/ 136  <train> Loss:0.5464  Acc:0.8990  fbScore:0.8166   LR -> Group0: 3.9542e-06 / Group1: 7.9085e-06 / Group2: 1.1863e-05 / Group3: 1.9771e-04 / Group4: 1.9771e-04 / Group5: 1.9771e-04 / 
  40/ 136  <train> Loss:0.4845  Acc:0.9076  fbScore:0.8427   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  50/ 136  <train> Loss:0.4752  Acc:0.9128  fbScore:0.8383   LR -> Group0: 3.6275e-06 / Group1: 7.2549e-06 / Group2: 1.0882e-05 / Group3: 1.8137e-04 / Group4: 1.8137e-04 / Group5: 1.8137e-04 / 
  60/ 136  <train> Loss:0.4553

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.5573  Acc:0.9215  fbScore:0.8911
Checkpoints have been updated to the epoch 4 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.2657  Acc:0.9242  fbScore:0.7452   LR -> Group0: 2.0588e-06 / Group1: 4.1176e-06 / Group2: 6.1765e-06 / Group3: 1.0294e-04 / Group4: 1.0294e-04 / Group5: 1.0294e-04 / 
  20/ 136  <train> Loss:0.3769  Acc:0.9352  fbScore:0.8320   LR -> Group0: 1.8954e-06 / Group1: 3.7908e-06 / Group2: 5.6863e-06 / Group3: 9.4771e-05 / Group4: 9.4771e-05 / Group5: 9.4771e-05 / 
  30/ 136  <train> Loss:0.3441  Acc:0.9346  fbScore:0.8639   LR -> Group0: 1.7320e-06 / Group1: 3.4641e-06 / Group2: 5.1961e-06 / Group3: 8.6601e-05 / Group4: 8.6601e-05 / Group5: 8.6601e-05 / 
  40/ 136  <train> Loss:0.3292  Acc:0.9352  fbScore:0.8801   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  50/ 136  <train> Loss:0.3081  Acc:0.9367  fbScore:0.8940   LR -> Group0: 1.4052e-06 / Group1: 2.8105e-06 / Group2: 4.2157e-06 / Group3: 7.0261e-05 / Group4: 7.0261e-05 / Group5: 7.0261e-05 / 
  60/ 136  <train> Loss:0.3091

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6454  Acc:0.9397  fbScore:0.9049
Checkpoints have been updated to the epoch 5 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/11 [00:00<?, ?it/s]

Loss:1.0350  Acc:0.9390  fbScore:0.8780
fb_score : 0.884621200665356

[32mCross-validation loop : 4/5[0m
Train  ->  label_1:403 / all:17373   (2.320%)
Valid  ->  label_1:101 / all:4343   (2.326%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:68 / num_training_steps:680
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:2.4497  Acc:0.0172  fbScore:0.4168   LR -> Group0: 1.4706e-06 / Group1: 2.9412e-06 / Group2: 4.4118e-06 / Group3: 7.3529e-05 / Group4: 7.3529e-05 / Group5: 7.3529e-05 / 
  20/ 136  <train> Loss:2.6064  Acc:0.0188  fbScore:0.4431   LR -> Group0: 2.9412e-06 / Group1: 5.8824e-06 / Group2: 8.8235e-06 / Group3: 1.4706e-04 / Group4: 1.4706e-04 / Group5: 1.4706e-04 / 
  30/ 136  <train> Loss:2.7091  Acc:0.0198  fbScore:0.4653   LR -> Group0: 4.4118e-06 / Group1: 8.8235e-06 / Group2: 1.3235e-05 / Group3: 2.2059e-04 / Group4: 2.2059e-04 / Group5: 2.2059e-04 / 
  40/ 136  <train> Loss:2.9103  Acc:0.0219  fbScore:0.4868   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  50/ 136  <train> Loss:2.9151  Acc:0.0222  fbScore:0.4872   LR -> Group0: 7.3529e-06 / Group1: 1.4706e-05 / Group2: 2.2059e-05 / Group3: 3.6765e-04 / Group4: 3.6765e-04 / Group5: 3.6765e-04 / 
  60/ 136  <train> Loss:2.9910

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:1.7615  Acc:0.0262  fbScore:0.5386
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:1.4709  Acc:0.3461  fbScore:0.6034   LR -> Group0: 8.7255e-06 / Group1: 1.7451e-05 / Group2: 2.6176e-05 / Group3: 4.3627e-04 / Group4: 4.3627e-04 / Group5: 4.3627e-04 / 
  20/ 136  <train> Loss:1.3680  Acc:0.5652  fbScore:0.6192   LR -> Group0: 8.5621e-06 / Group1: 1.7124e-05 / Group2: 2.5686e-05 / Group3: 4.2810e-04 / Group4: 4.2810e-04 / Group5: 4.2810e-04 / 
  30/ 136  <train> Loss:1.2727  Acc:0.6445  fbScore:0.6558   LR -> Group0: 8.3987e-06 / Group1: 1.6797e-05 / Group2: 2.5196e-05 / Group3: 4.1993e-04 / Group4: 4.1993e-04 / Group5: 4.1993e-04 / 
  40/ 136  <train> Loss:1.3671  Acc:0.6203  fbScore:0.6587   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  50/ 136  <train> Loss:1.3485  Acc:0.6570  fbScore:0.6584   LR -> Group0: 8.0719e-06 / Group1: 1.6144e-05 / Group2: 2.4216e-05 / Group3: 4.0359e-04 / Group4: 4.0359e-04 / Group5: 4.0359e-04 / 
  60/ 136  <train> Loss:1.3545

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.8172  Acc:0.8671  fbScore:0.8556
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.6567  Acc:0.9258  fbScore:0.7812   LR -> Group0: 6.5033e-06 / Group1: 1.3007e-05 / Group2: 1.9510e-05 / Group3: 3.2516e-04 / Group4: 3.2516e-04 / Group5: 3.2516e-04 / 
  20/ 136  <train> Loss:0.5339  Acc:0.9137  fbScore:0.8385   LR -> Group0: 6.3399e-06 / Group1: 1.2680e-05 / Group2: 1.9020e-05 / Group3: 3.1699e-04 / Group4: 3.1699e-04 / Group5: 3.1699e-04 / 
  30/ 136  <train> Loss:0.6529  Acc:0.8971  fbScore:0.8445   LR -> Group0: 6.1765e-06 / Group1: 1.2353e-05 / Group2: 1.8529e-05 / Group3: 3.0882e-04 / Group4: 3.0882e-04 / Group5: 3.0882e-04 / 
  40/ 136  <train> Loss:0.6282  Acc:0.8885  fbScore:0.8329   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  50/ 136  <train> Loss:0.5876  Acc:0.8914  fbScore:0.8495   LR -> Group0: 5.8497e-06 / Group1: 1.1699e-05 / Group2: 1.7549e-05 / Group3: 2.9248e-04 / Group4: 2.9248e-04 / Group5: 2.9248e-04 / 
  60/ 136  <train> Loss:0.6085

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6658  Acc:0.9127  fbScore:0.8893
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.3119  Acc:0.9102  fbScore:0.7378   LR -> Group0: 4.2810e-06 / Group1: 8.5621e-06 / Group2: 1.2843e-05 / Group3: 2.1405e-04 / Group4: 2.1405e-04 / Group5: 2.1405e-04 / 
  20/ 136  <train> Loss:0.3088  Acc:0.9184  fbScore:0.8235   LR -> Group0: 4.1176e-06 / Group1: 8.2353e-06 / Group2: 1.2353e-05 / Group3: 2.0588e-04 / Group4: 2.0588e-04 / Group5: 2.0588e-04 / 
  30/ 136  <train> Loss:0.3389  Acc:0.9227  fbScore:0.8535   LR -> Group0: 3.9542e-06 / Group1: 7.9085e-06 / Group2: 1.1863e-05 / Group3: 1.9771e-04 / Group4: 1.9771e-04 / Group5: 1.9771e-04 / 
  40/ 136  <train> Loss:0.4701  Acc:0.9143  fbScore:0.8549   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  50/ 136  <train> Loss:0.5067  Acc:0.8905  fbScore:0.8581   LR -> Group0: 3.6275e-06 / Group1: 7.2549e-06 / Group2: 1.0882e-05 / Group3: 1.8137e-04 / Group4: 1.8137e-04 / Group5: 1.8137e-04 / 
  60/ 136  <train> Loss:0.4941

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.5842  Acc:0.9388  fbScore:0.9090
Checkpoints have been updated to the epoch 4 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.2587  Acc:0.9313  fbScore:0.7411   LR -> Group0: 2.0588e-06 / Group1: 4.1176e-06 / Group2: 6.1765e-06 / Group3: 1.0294e-04 / Group4: 1.0294e-04 / Group5: 1.0294e-04 / 
  20/ 136  <train> Loss:0.2611  Acc:0.9336  fbScore:0.7912   LR -> Group0: 1.8954e-06 / Group1: 3.7908e-06 / Group2: 5.6863e-06 / Group3: 9.4771e-05 / Group4: 9.4771e-05 / Group5: 9.4771e-05 / 
  30/ 136  <train> Loss:0.3527  Acc:0.9375  fbScore:0.8221   LR -> Group0: 1.7320e-06 / Group1: 3.4641e-06 / Group2: 5.1961e-06 / Group3: 8.6601e-05 / Group4: 8.6601e-05 / Group5: 8.6601e-05 / 
  40/ 136  <train> Loss:0.3153  Acc:0.9410  fbScore:0.8559   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  50/ 136  <train> Loss:0.3128  Acc:0.9386  fbScore:0.8717   LR -> Group0: 1.4052e-06 / Group1: 2.8105e-06 / Group2: 4.2157e-06 / Group3: 7.0261e-05 / Group4: 7.0261e-05 / Group5: 7.0261e-05 / 
  60/ 136  <train> Loss:0.3062

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6661  Acc:0.9445  fbScore:0.9133
Checkpoints have been updated to the epoch 5 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/11 [00:00<?, ?it/s]

Loss:0.8791  Acc:0.9392  fbScore:0.8874
fb_score : 0.892047172664046

[32mCross-validation loop : 5/5[0m
Train  ->  label_1:404 / all:17374   (2.325%)
Valid  ->  label_1:100 / all:4342   (2.303%)
Choosed BertLstmExModel


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


InitLR:2e-05 / num_warmup_steps:68 / num_training_steps:680
Use 4 GPUs
Using device : cuda
【Epoch   1/  5】   LR -> Group0: 0.0000e+00 / Group1: 0.0000e+00 / Group2: 0.0000e+00 / Group3: 0.0000e+00 / Group4: 0.0000e+00 / Group5: 0.0000e+00 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:2.9240  Acc:0.0219  fbScore:0.4991   LR -> Group0: 1.4706e-06 / Group1: 2.9412e-06 / Group2: 4.4118e-06 / Group3: 7.3529e-05 / Group4: 7.3529e-05 / Group5: 7.3529e-05 / 
  20/ 136  <train> Loss:2.7256  Acc:0.0199  fbScore:0.4466   LR -> Group0: 2.9412e-06 / Group1: 5.8824e-06 / Group2: 8.8235e-06 / Group3: 1.4706e-04 / Group4: 1.4706e-04 / Group5: 1.4706e-04 / 
  30/ 136  <train> Loss:2.7627  Acc:0.0203  fbScore:0.4588   LR -> Group0: 4.4118e-06 / Group1: 8.8235e-06 / Group2: 1.3235e-05 / Group3: 2.2059e-04 / Group4: 2.2059e-04 / Group5: 2.2059e-04 / 
  40/ 136  <train> Loss:2.7400  Acc:0.0201  fbScore:0.4496   LR -> Group0: 5.8824e-06 / Group1: 1.1765e-05 / Group2: 1.7647e-05 / Group3: 2.9412e-04 / Group4: 2.9412e-04 / Group5: 2.9412e-04 / 
  50/ 136  <train> Loss:2.8606  Acc:0.0216  fbScore:0.4598   LR -> Group0: 7.3529e-06 / Group1: 1.4706e-05 / Group2: 2.2059e-05 / Group3: 3.6765e-04 / Group4: 3.6765e-04 / Group5: 3.6765e-04 / 
  60/ 136  <train> Loss:2.8644

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:1.1149  Acc:0.7662  fbScore:0.8057
Checkpoints have been updated to the epoch 1 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   2/  5】   LR -> Group0: 8.8889e-06 / Group1: 1.7778e-05 / Group2: 2.6667e-05 / Group3: 4.4444e-04 / Group4: 4.4444e-04 / Group5: 4.4444e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:1.2412  Acc:0.6820  fbScore:0.6692   LR -> Group0: 8.7255e-06 / Group1: 1.7451e-05 / Group2: 2.6176e-05 / Group3: 4.3627e-04 / Group4: 4.3627e-04 / Group5: 4.3627e-04 / 
  20/ 136  <train> Loss:1.2298  Acc:0.7855  fbScore:0.7139   LR -> Group0: 8.5621e-06 / Group1: 1.7124e-05 / Group2: 2.5686e-05 / Group3: 4.2810e-04 / Group4: 4.2810e-04 / Group5: 4.2810e-04 / 
  30/ 136  <train> Loss:1.0990  Acc:0.8141  fbScore:0.7147   LR -> Group0: 8.3987e-06 / Group1: 1.6797e-05 / Group2: 2.5196e-05 / Group3: 4.1993e-04 / Group4: 4.1993e-04 / Group5: 4.1993e-04 / 
  40/ 136  <train> Loss:0.9541  Acc:0.8369  fbScore:0.7173   LR -> Group0: 8.2353e-06 / Group1: 1.6471e-05 / Group2: 2.4706e-05 / Group3: 4.1176e-04 / Group4: 4.1176e-04 / Group5: 4.1176e-04 / 
  50/ 136  <train> Loss:0.9313  Acc:0.8545  fbScore:0.7454   LR -> Group0: 8.0719e-06 / Group1: 1.6144e-05 / Group2: 2.4216e-05 / Group3: 4.0359e-04 / Group4: 4.0359e-04 / Group5: 4.0359e-04 / 
  60/ 136  <train> Loss:0.9037

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.5469  Acc:0.8779  fbScore:0.8775
Checkpoints have been updated to the epoch 2 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   3/  5】   LR -> Group0: 6.6667e-06 / Group1: 1.3333e-05 / Group2: 2.0000e-05 / Group3: 3.3333e-04 / Group4: 3.3333e-04 / Group5: 3.3333e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.3660  Acc:0.9203  fbScore:0.9359   LR -> Group0: 6.5033e-06 / Group1: 1.3007e-05 / Group2: 1.9510e-05 / Group3: 3.2516e-04 / Group4: 3.2516e-04 / Group5: 3.2516e-04 / 
  20/ 136  <train> Loss:0.5520  Acc:0.9273  fbScore:0.9180   LR -> Group0: 6.3399e-06 / Group1: 1.2680e-05 / Group2: 1.9020e-05 / Group3: 3.1699e-04 / Group4: 3.1699e-04 / Group5: 3.1699e-04 / 
  30/ 136  <train> Loss:0.4623  Acc:0.9344  fbScore:0.8989   LR -> Group0: 6.1765e-06 / Group1: 1.2353e-05 / Group2: 1.8529e-05 / Group3: 3.0882e-04 / Group4: 3.0882e-04 / Group5: 3.0882e-04 / 
  40/ 136  <train> Loss:0.4398  Acc:0.9270  fbScore:0.8966   LR -> Group0: 6.0131e-06 / Group1: 1.2026e-05 / Group2: 1.8039e-05 / Group3: 3.0065e-04 / Group4: 3.0065e-04 / Group5: 3.0065e-04 / 
  50/ 136  <train> Loss:0.4953  Acc:0.9320  fbScore:0.9047   LR -> Group0: 5.8497e-06 / Group1: 1.1699e-05 / Group2: 1.7549e-05 / Group3: 2.9248e-04 / Group4: 2.9248e-04 / Group5: 2.9248e-04 / 
  60/ 136  <train> Loss:0.5463

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6122  Acc:0.9127  fbScore:0.8943
Checkpoints have been updated to the epoch 3 weights.
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   4/  5】   LR -> Group0: 4.4444e-06 / Group1: 8.8889e-06 / Group2: 1.3333e-05 / Group3: 2.2222e-04 / Group4: 2.2222e-04 / Group5: 2.2222e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.3581  Acc:0.9070  fbScore:0.9064   LR -> Group0: 4.2810e-06 / Group1: 8.5621e-06 / Group2: 1.2843e-05 / Group3: 2.1405e-04 / Group4: 2.1405e-04 / Group5: 2.1405e-04 / 
  20/ 136  <train> Loss:0.3479  Acc:0.9176  fbScore:0.9285   LR -> Group0: 4.1176e-06 / Group1: 8.2353e-06 / Group2: 1.2353e-05 / Group3: 2.0588e-04 / Group4: 2.0588e-04 / Group5: 2.0588e-04 / 
  30/ 136  <train> Loss:0.4823  Acc:0.9234  fbScore:0.9222   LR -> Group0: 3.9542e-06 / Group1: 7.9085e-06 / Group2: 1.1863e-05 / Group3: 1.9771e-04 / Group4: 1.9771e-04 / Group5: 1.9771e-04 / 
  40/ 136  <train> Loss:0.4838  Acc:0.9213  fbScore:0.9201   LR -> Group0: 3.7908e-06 / Group1: 7.5817e-06 / Group2: 1.1373e-05 / Group3: 1.8954e-04 / Group4: 1.8954e-04 / Group5: 1.8954e-04 / 
  50/ 136  <train> Loss:0.4666  Acc:0.9120  fbScore:0.8918   LR -> Group0: 3.6275e-06 / Group1: 7.2549e-06 / Group2: 1.0882e-05 / Group3: 1.8137e-04 / Group4: 1.8137e-04 / Group5: 1.8137e-04 / 
  60/ 136  <train> Loss:0.4561

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.6362  Acc:0.9265  fbScore:0.8871
------------------------------------------------------------------------------------------------------------------------------------------------------
【Epoch   5/  5】   LR -> Group0: 2.2222e-06 / Group1: 4.4444e-06 / Group2: 6.6667e-06 / Group3: 1.1111e-04 / Group4: 1.1111e-04 / Group5: 1.1111e-04 / 


  0%|          | 0/136 [00:00<?, ?it/s]

  10/ 136  <train> Loss:0.4972  Acc:0.9320  fbScore:0.8705   LR -> Group0: 2.0588e-06 / Group1: 4.1176e-06 / Group2: 6.1765e-06 / Group3: 1.0294e-04 / Group4: 1.0294e-04 / Group5: 1.0294e-04 / 
  20/ 136  <train> Loss:0.7013  Acc:0.9316  fbScore:0.8879   LR -> Group0: 1.8954e-06 / Group1: 3.7908e-06 / Group2: 5.6863e-06 / Group3: 9.4771e-05 / Group4: 9.4771e-05 / Group5: 9.4771e-05 / 
  30/ 136  <train> Loss:0.5874  Acc:0.9224  fbScore:0.8606   LR -> Group0: 1.7320e-06 / Group1: 3.4641e-06 / Group2: 5.1961e-06 / Group3: 8.6601e-05 / Group4: 8.6601e-05 / Group5: 8.6601e-05 / 
  40/ 136  <train> Loss:0.5116  Acc:0.9227  fbScore:0.8560   LR -> Group0: 1.5686e-06 / Group1: 3.1373e-06 / Group2: 4.7059e-06 / Group3: 7.8431e-05 / Group4: 7.8431e-05 / Group5: 7.8431e-05 / 
  50/ 136  <train> Loss:0.4760  Acc:0.9237  fbScore:0.8320   LR -> Group0: 1.4052e-06 / Group1: 2.8105e-06 / Group2: 4.2157e-06 / Group3: 7.0261e-05 / Group4: 7.0261e-05 / Group5: 7.0261e-05 / 
  60/ 136  <train> Loss:0.4429

  0%|          | 0/9 [00:00<?, ?it/s]

<val> Loss:0.8244  Acc:0.9438  fbScore:0.8782
------------------------------------------------------------------------------------------------------------------------------------------------------
Evaluate Test Dataset


  0%|          | 0/11 [00:00<?, ?it/s]

Loss:0.6357  Acc:0.9145  fbScore:0.8889
fb_score : 0.8957654723127035



In [17]:
logs.keys()

dict_keys(['fit_history', 'test_preds_labels', 'test_fb_score'])

In [18]:
logs['test_fb_score'][1]

0.9067357512953368

In [19]:
for i in range(hps.cv_n):
    fb_score = logs['test_fb_score'][i]
    print(fb_score)

0.9028831562974203
0.9067357512953368
0.884621200665356
0.892047172664046
0.8957654723127035


In [20]:
test_pred_df = pd.DataFrame(columns=[f"cv{i}" for i in range(hps.cv_n)] + ['cv_ensemble', 'label'])
test_pred_df['label'] = logs['test_preds_labels'][0]['labels']
for i in range(hps.cv_n):
    test_pred_df[f"cv{i}"] = logs['test_preds_labels'][i]['preds']




test_pred_df['cv_ensemble'] = test_pred_df.loc[:, 'cv0':'cv4'].mean(axis=1).map(lambda x: 1 if x >= 0.5 else 0)


display(test_pred_df)
display(test_pred_df.describe())

Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,0.0,0.0,0.0,0.0,0.0,0,0.0
2,0.0,0.0,0.0,0.0,0.0,0,0.0
3,0.0,0.0,0.0,0.0,0.0,0,0.0
4,0.0,0.0,0.0,0.0,0.0,0,0.0
...,...,...,...,...,...,...,...
5424,0.0,0.0,0.0,0.0,0.0,0,0.0
5425,0.0,0.0,0.0,0.0,1.0,0,0.0
5426,0.0,0.0,0.0,0.0,0.0,0,0.0
5427,0.0,0.0,0.0,0.0,0.0,0,0.0


Unnamed: 0,cv0,cv1,cv2,cv3,cv4,cv_ensemble,label
count,5429.0,5429.0,5429.0,5429.0,5429.0,5429.0,5429.0
mean,0.076626,0.071468,0.080862,0.081046,0.106834,0.076441,0.023209
std,0.266021,0.257629,0.272648,0.272931,0.30893,0.265727,0.15058
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [21]:
cv_ensemble_fb_score = fbeta_score(y_true=test_pred_df['label'], y_pred=test_pred_df['cv_ensemble'], beta=7.0)
print(f"CV_Ensemble_Fb_score : {cv_ensemble_fb_score}")

CV_Ensemble_Fb_score : 0.9030201851570798
