In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m98.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m74.8 MB/s[0m eta [36m0:00:0

In [3]:
!nvidia-smi

Sat Sep  2 07:39:58 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    44W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
import os

DIR = "/content/drive/MyDrive/Competitions/Signate/MUFG2023"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


OUTPUT_EXP_DIR = DIR + '/output/EXP050/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:


# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    # model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=4
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=64
    fc_dropout=0.2
    target="is_fraud?"
    target_size=1
    max_len=97
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return f1_score(y_true, (y_pred>thresh).astype(int))

def get_f1_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = f1_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    return f1_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """

    for parameter in module.parameters():
        parameter.requires_grad = False

def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """

    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)

    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """

    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"

        if hasattr(embeddings_path, attr_name):
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np


train = pd.read_csv(os.path.join(INPUT_DIR,"train.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test.csv"))
card = pd.read_csv(os.path.join(INPUT_DIR, "card.csv"))
user = pd.read_csv(os.path.join(INPUT_DIR, "user.csv"))
sub = pd.read_csv(os.path.join(INPUT_DIR, "sample_submit.csv"), header=None)

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(card.shape)
display(card.head(3))

print(user.shape)
display(user.head(3))

print(sub.shape)
display(sub.head(3))

(471283, 12)


Unnamed: 0,index,user_id,card_id,amount,errors?,is_fraud?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,0,1721,0,$2.623,OK,0,209237,Joliet,IL,60436.0,5541,Swipe Transaction
1,1,1629,3,$6.4,OK,0,2568,Edgerton,WI,53534.0,5814,Swipe Transaction
2,2,655,3,$123.5,OK,0,345310,Ridgefield,WA,98642.0,7538,Swipe Transaction


(457958, 11)


Unnamed: 0,index,user_id,card_id,amount,errors?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,471283,541,3,$113.278,OK,324189,Orlando,FL,32821.0,4814,Swipe Transaction
1,471284,655,1,$293.944,OK,81219,Ridgefield,WA,98642.0,7538,Chip Transaction
2,471285,492,0,$47.4,OK,274755,Arlington Heights,IL,60004.0,5719,Swipe Transaction


(416, 10)


Unnamed: 0,user_id,card_id,card_brand,card_type,expires,has_chip,cards_issued,credit_limit,acct_open_date,year_pin_last_changed
0,39,0,Visa,Debit,09/2021,YES,1,$17117,05/2007,2010
1,39,1,Amex,Credit,11/2024,YES,2,$5400,10/2015,2015
2,41,0,Discover,Credit,03/2022,YES,2,$14800,12/2010,2011


(97, 17)


Unnamed: 0,user_id,current_age,retirement_age,birth_year,birth_month,gender,address,city,state,zipcode,latitude,longitude,per_capita_income_zipcode,yearly_income_person,total_debt,fico_score,num_credit_cards
0,39,57,64,1962,12,Female,442 Burns Boulevard,Mansfield,MA,2048,42.02,-71.21,$37407,$76274,$102611,698,2
1,41,39,66,1980,10,Female,3863 River Avenue,Lincoln,CA,95648,38.93,-121.25,$21829,$44506,$57994,849,3
2,47,40,67,1979,5,Female,8799 Elm Avenue,Mckinney,TX,75069,33.2,-96.65,$24684,$50329,$76759,625,4


(457958, 2)


Unnamed: 0,0,1
0,471283,0
1,471284,1
2,471285,0


In [10]:
month_dict = {
   "01": "January",
   "02": "February",
   "03": "March",
   "04": "April",
   "05": "May",
   "06": "June",
   "07": "July",
   "08": "August",
   "09": "September",
   "10": "October",
   "11": "November",
   "12": "December"
}

def get_expires_values(df):
  _df = df["expires"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["expires_month"] = _df["month"].astype(str)
  df["expires_years"] = _df["years"].astype(str)
  return df

def get_acct_open_date_values(df):
  _df = df["acct_open_date"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["acct_open_date_month"] = _df["month"].astype(str)
  df["acct_open_date_years"] = _df["years"].astype(str)
  return df


card = get_expires_values(card)
card = get_acct_open_date_values(card)
card["expires_month"] = card["expires_month"].map(month_dict)
card["acct_open_date_month"] = card["acct_open_date_month"].map(month_dict)

In [11]:
train = train.merge(card, how="left", on=["user_id", "card_id"]).merge(user, how="left", on="user_id")

In [12]:
train.fillna('unknown', inplace = True)

train["texts"] = "merchant" + "[SEP]" + train["amount"] + "[SEP]" + train["errors?"] + "[SEP]" + train["merchant_city"] + "[SEP]" + train["merchant_state"] + "[SEP]" + train["use_chip"] + "[SEP]" \
+ "card" + "[SEP]" + train["card_brand"] + "[SEP]" + train["card_type"] + "[SEP]" + train["expires_month"] + " " + train["expires_years"] + "[SEP]" + train["has_chip"] + "[SEP]" + train["acct_open_date_month"] + " " + train["acct_open_date_years"] + "[SEP]" + train["year_pin_last_changed"].astype(str) + "[SEP]" \
"user" + "[SEP]" + train["current_age"].astype(str) + " year old " + train["gender"] + "[SEP]" + "retired at age " + train["retirement_age"].astype(str) + "[SEP]" + train["address"] + "[SEP]" + train["city"] + "[SEP]" + train["state"] + "[SEP]" + train["per_capita_income_zipcode"] + "[SEP]" + train["yearly_income_person"] + "[SEP]" + train["total_debt"]

In [13]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train[CFG.target])):
    train.loc[val_ , "kfold"] = int(fold)

train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [14]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Downloading (…)okenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

Downloading spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
# ====================================================
# Define max_len
# ====================================================
#lengths = []
#tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
#for text in tk0:
#    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
#    lengths.append(length)
#CFG.max_len = max(lengths) + 23 # cls
#LOGGER.info(f"max_len: {CFG.max_len}")

In [16]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.half)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

class ValidDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings


class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self.sig = nn.Sigmoid()

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        #output = self.sig(output)
        return output

In [18]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader),
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [19]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target].values

    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = ValidDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)

    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr,
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler

    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)

        # scoring
        score = get_score(valid_labels, predictions)
        f1_score = get_f1_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')


        if best_score < f1_score:
            best_score = f1_score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth",
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()

    return valid_folds

In [20]:
if __name__ == '__main__':

    def get_result(oof_df):
        labels = oof_df[CFG.target].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        f1_score = get_f1_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'F1 BEST Score: {f1_score:<.4f}')

    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_dropout": 0.0,
 

Downloading pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

Epoch: [1][0/5891] Elapsed 0m 4s (remain 428m 13s) Loss: 0.7217(0.7217) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 24s (remain 23m 22s) Loss: 0.1820(0.2691) Grad: 0.5388  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 44s (remain 21m 11s) Loss: 0.1932(0.2445) Grad: 2.1325  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 5s (remain 20m 8s) Loss: 0.1771(0.2328) Grad: 1.4769  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 25s (remain 19m 27s) Loss: 0.1471(0.2242) Grad: 0.7167  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 45s (remain 18m 54s) Loss: 0.2715(0.2219) Grad: 0.9667  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 5s (remain 18m 26s) Loss: 0.1816(0.2202) Grad: 1.7610  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 2m 25s (remain 17m 59s) Loss: 0.2520(0.2185) Grad: 0.8808  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 45s (remain 17m 34s) Loss: 0.2629(0.2178) Grad: 0.8698  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 3m 6s (remain 17m 12s) Loss: 0.235

Epoch 1 - avg_train_loss: 0.2424  avg_val_loss: 0.2522  time: 1510s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2424  avg_val_loss: 0.2522  time: 1510s
Epoch 1 - Score: 0.0000
INFO:__main__:Epoch 1 - Score: 0.0000
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 48m 42s) Loss: 0.5317(0.5317) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 21s (remain 20m 7s) Loss: 0.2345(0.2606) Grad: 0.2304  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 41s (remain 19m 41s) Loss: 0.1544(0.2559) Grad: 1.2956  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 2s (remain 19m 17s) Loss: 0.2747(0.2515) Grad: 0.3128  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 22s (remain 18m 49s) Loss: 0.1934(0.2495) Grad: 0.7254  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 42s (remain 18m 25s) Loss: 0.3582(0.2503) Grad: 1.3860  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 2s (remain 18m 1s) Loss: 0.1519(0.2483) Grad: 1.1697  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 2m 23s (remain 17m 38s) Loss: 0.2338(0.2468) Grad: 0.0509  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 43s (remain 17m 17s) Loss: 0.3120(0.2482) Grad: 0.3181  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 3m 3s (remain 16m 55s) Loss: 0.2341(

Epoch 2 - avg_train_loss: 0.2519  avg_val_loss: 0.2522  time: 1508s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2519  avg_val_loss: 0.2522  time: 1508s
Epoch 2 - Score: 0.0000
INFO:__main__:Epoch 2 - Score: 0.0000


Epoch: [3][0/5891] Elapsed 0m 0s (remain 48m 54s) Loss: 0.1580(0.1580) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 20s (remain 19m 50s) Loss: 0.3162(0.2490) Grad: 0.8906  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 40s (remain 19m 18s) Loss: 0.2754(0.2498) Grad: 0.3512  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 1s (remain 18m 55s) Loss: 0.1957(0.2516) Grad: 0.8626  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 21s (remain 18m 33s) Loss: 0.3174(0.2508) Grad: 0.9542  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 41s (remain 18m 12s) Loss: 0.2338(0.2512) Grad: 0.1359  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 1s (remain 17m 52s) Loss: 0.1920(0.2507) Grad: 0.5464  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 2m 22s (remain 17m 31s) Loss: 0.2345(0.2508) Grad: 0.2302  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 42s (remain 17m 12s) Loss: 0.1912(0.2495) Grad: 0.4687  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.397

Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1510s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1510s
Epoch 3 - Score: 0.0000
INFO:__main__:Epoch 3 - Score: 0.0000


Epoch: [4][0/5891] Elapsed 0m 0s (remain 48m 20s) Loss: 0.2339(0.2339) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 20s (remain 19m 50s) Loss: 0.2339(0.2475) Grad: 0.2219  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 41s (remain 19m 22s) Loss: 0.2336(0.2428) Grad: 0.0804  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 1s (remain 19m 4s) Loss: 0.3564(0.2464) Grad: 1.3219  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 21s (remain 18m 41s) Loss: 0.4006(0.2443) Grad: 1.9226  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 42s (remain 18m 19s) Loss: 0.2335(0.2450) Grad: 0.1129  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 2s (remain 17m 57s) Loss: 0.1525(0.2455) Grad: 1.1846  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 2m 22s (remain 17m 36s) Loss: 0.2749(0.2467) Grad: 0.3198  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 42s (remain 17m 15s) Loss: 0.3574(0.2460) Grad: 1.3534  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 3m 3s (remain 16m 54s) Loss: 0.3552

Epoch 4 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1511s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1511s
Epoch 4 - Score: 0.0000
INFO:__main__:Epoch 4 - Score: 0.0000


f1 score : 0.0
recall score : 0.0
precision score : 0.0


Score: 0.0000
INFO:__main__:Score: 0.0000
F1 BEST Score: 0.0000
INFO:__main__:F1 BEST Score: 0.0000
DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__mai

Epoch: [1][0/5891] Elapsed 0m 0s (remain 55m 57s) Loss: 1.1582(1.1582) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 20s (remain 19m 48s) Loss: 0.2798(0.3020) Grad: 0.4722  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 41s (remain 19m 21s) Loss: 0.2769(0.2628) Grad: 1.6551  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 1s (remain 18m 58s) Loss: 0.2391(0.2443) Grad: 0.6301  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 21s (remain 18m 36s) Loss: 0.1571(0.2380) Grad: 0.7681  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 41s (remain 18m 14s) Loss: 0.1646(0.2324) Grad: 0.7958  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 2s (remain 17m 54s) Loss: 0.2712(0.2282) Grad: 0.7506  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 2m 22s (remain 17m 33s) Loss: 0.1234(0.2243) Grad: 0.9083  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 42s (remain 17m 12s) Loss: 0.1508(0.2216) Grad: 0.5754  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.117

Epoch 1 - avg_train_loss: 0.2185  avg_val_loss: 0.2520  time: 1512s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2185  avg_val_loss: 0.2520  time: 1512s
Epoch 1 - Score: 0.0000
INFO:__main__:Epoch 1 - Score: 0.0000
Epoch 1 - Save Best Score: 0.0000 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.0000 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 47m 48s) Loss: 0.1917(0.1917) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 21s (remain 20m 6s) Loss: 0.2744(0.2448) Grad: 0.1865  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 41s (remain 19m 33s) Loss: 0.2341(0.2480) Grad: 0.2456  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 2s (remain 19m 13s) Loss: 0.3816(0.2520) Grad: 0.9664  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 22s (remain 18m 47s) Loss: 0.2788(0.2503) Grad: 0.7432  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 42s (remain 18m 27s) Loss: 0.3589(0.2499) Grad: 1.4255  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 3s (remain 18m 2s) Loss: 0.2744(0.2522) Grad: 0.2567  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 2m 23s (remain 17m 41s) Loss: 0.2341(0.2507) Grad: 0.0707  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 43s (remain 17m 19s) Loss: 0.3147(0.2524) Grad: 0.7007  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 3m 3s (remain 16m 58s) Loss: 0.3162(

Epoch 2 - avg_train_loss: 0.2520  avg_val_loss: 0.2517  time: 1514s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2520  avg_val_loss: 0.2517  time: 1514s
Epoch 2 - Score: 0.0000
INFO:__main__:Epoch 2 - Score: 0.0000


Epoch: [3][0/5891] Elapsed 0m 0s (remain 47m 50s) Loss: 0.3140(0.3140) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 20s (remain 19m 49s) Loss: 0.2739(0.2759) Grad: 0.0684  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 41s (remain 19m 21s) Loss: 0.2749(0.2573) Grad: 0.3405  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 1s (remain 19m 0s) Loss: 0.1938(0.2582) Grad: 0.7850  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 21s (remain 18m 37s) Loss: 0.2351(0.2572) Grad: 0.3657  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 41s (remain 18m 16s) Loss: 0.3152(0.2584) Grad: 0.7873  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 2s (remain 17m 55s) Loss: 0.1941(0.2561) Grad: 0.7712  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.2744(0.2560) Grad: 0.2324  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 42s (remain 17m 14s) Loss: 0.1926(0.2537) Grad: 0.6748  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 3m 3s (remain 16m 53s) Loss: 0.2747

Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1512s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1512s
Epoch 3 - Score: 0.0000
INFO:__main__:Epoch 3 - Score: 0.0000


Epoch: [4][0/5891] Elapsed 0m 0s (remain 48m 54s) Loss: 0.2344(0.2344) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 20s (remain 19m 51s) Loss: 0.4009(0.2423) Grad: 1.9561  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 41s (remain 19m 22s) Loss: 0.2336(0.2425) Grad: 0.1120  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 1s (remain 18m 59s) Loss: 0.1926(0.2460) Grad: 0.6746  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 21s (remain 18m 42s) Loss: 0.3523(0.2533) Grad: 1.0627  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 42s (remain 18m 20s) Loss: 0.1934(0.2524) Grad: 0.7256  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 2s (remain 18m 0s) Loss: 0.4375(0.2523) Grad: 2.3076  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 2m 23s (remain 17m 38s) Loss: 0.2747(0.2515) Grad: 0.3358  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 43s (remain 17m 17s) Loss: 0.2344(0.2521) Grad: 0.2970  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 3m 3s (remain 16m 56s) Loss: 0.2749

Epoch 4 - avg_train_loss: 0.2517  avg_val_loss: 0.2517  time: 1514s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2517  avg_val_loss: 0.2517  time: 1514s
Epoch 4 - Score: 0.0000
INFO:__main__:Epoch 4 - Score: 0.0000


f1 score : 0.0
recall score : 0.0
precision score : 0.0


Score: 0.0000
INFO:__main__:Score: 0.0000
F1 BEST Score: 0.0000
INFO:__main__:F1 BEST Score: 0.0000
DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__mai

Epoch: [1][0/5891] Elapsed 0m 0s (remain 56m 44s) Loss: 1.0850(1.0850) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 20s (remain 19m 58s) Loss: 0.2568(0.2897) Grad: 1.5766  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 41s (remain 19m 24s) Loss: 0.3198(0.2626) Grad: 1.2483  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 1s (remain 19m 0s) Loss: 0.2349(0.2508) Grad: 1.9686  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 21s (remain 18m 37s) Loss: 0.2017(0.2423) Grad: 0.6241  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 41s (remain 18m 15s) Loss: 0.1088(0.2359) Grad: 1.0369  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 2s (remain 17m 54s) Loss: 0.2566(0.2319) Grad: 0.4009  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.2007(0.2306) Grad: 0.5513  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 42s (remain 17m 14s) Loss: 0.2039(0.2287) Grad: 1.0159  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.2515

Epoch 1 - avg_train_loss: 0.1900  avg_val_loss: 0.1713  time: 1511s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1900  avg_val_loss: 0.1713  time: 1511s
Epoch 1 - Score: 0.4382
INFO:__main__:Epoch 1 - Score: 0.4382
Epoch 1 - Save Best Score: 0.5060 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.5060 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 48m 54s) Loss: 0.1998(0.1998) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 20s (remain 19m 58s) Loss: 0.3296(0.1568) Grad: 1.6869  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 41s (remain 19m 26s) Loss: 0.2311(0.1659) Grad: 0.6598  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 1s (remain 19m 3s) Loss: 0.2690(0.1705) Grad: 1.7917  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 21s (remain 18m 39s) Loss: 0.1547(0.1689) Grad: 1.2350  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 42s (remain 18m 19s) Loss: 0.1971(0.1677) Grad: 1.6453  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 2s (remain 17m 57s) Loss: 0.1209(0.1666) Grad: 0.7617  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 2m 22s (remain 17m 35s) Loss: 0.1365(0.1685) Grad: 0.4910  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 42s (remain 17m 14s) Loss: 0.2059(0.1661) Grad: 1.1999  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 3m 3s (remain 16m 53s) Loss: 0.1510

Epoch 2 - avg_train_loss: 0.1636  avg_val_loss: 0.1606  time: 1508s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 48m 8s) Loss: 0.1294(0.1294) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 20s (remain 19m 45s) Loss: 0.1487(0.1526) Grad: 0.6720  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 40s (remain 19m 20s) Loss: 0.2152(0.1590) Grad: 0.8426  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 1s (remain 19m 0s) Loss: 0.1222(0.1547) Grad: 0.6547  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 21s (remain 18m 38s) Loss: 0.1898(0.1545) Grad: 1.1904  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 41s (remain 18m 15s) Loss: 0.2712(0.1523) Grad: 1.7278  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 1s (remain 17m 53s) Loss: 0.1503(0.1511) Grad: 0.7311  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 2m 22s (remain 17m 32s) Loss: 0.1544(0.1508) Grad: 0.8828  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 42s (remain 17m 11s) Loss: 0.1763(0.1512) Grad: 0.5502  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 3m 2s (remain 16m 50s) Loss: 0.1562(

Epoch 3 - avg_train_loss: 0.1500  avg_val_loss: 0.1553  time: 1505s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 47m 52s) Loss: 0.0522(0.0522) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 20s (remain 19m 55s) Loss: 0.1495(0.1395) Grad: 1.5239  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 41s (remain 19m 28s) Loss: 0.1057(0.1420) Grad: 0.5149  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 1s (remain 19m 2s) Loss: 0.0517(0.1421) Grad: 0.4948  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 21s (remain 18m 38s) Loss: 0.2407(0.1434) Grad: 1.4356  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 42s (remain 18m 18s) Loss: 0.1235(0.1433) Grad: 0.8642  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 2s (remain 17m 56s) Loss: 0.0828(0.1413) Grad: 0.8576  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.1672(0.1412) Grad: 0.7996  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 42s (remain 17m 13s) Loss: 0.2405(0.1409) Grad: 1.5752  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 3m 2s (remain 16m 51s) Loss: 0.2198

Epoch 4 - avg_train_loss: 0.1388  avg_val_loss: 0.1563  time: 1505s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5042327527488567
recall score : 0.3969664470660334
precision score : 0.6909333333333333


Score: 0.5042
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 0s (remain 62m 39s) Loss: 0.5361(0.5361) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 21s (remain 20m 9s) Loss: 0.1974(0.2761) Grad: 1.1203  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 41s (remain 19m 34s) Loss: 0.2345(0.2599) Grad: 0.2243  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 1s (remain 19m 6s) Loss: 0.2363(0.2578) Grad: 0.5637  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 21s (remain 18m 42s) Loss: 0.3184(0.2573) Grad: 1.0117  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 42s (remain 18m 20s) Loss: 0.1908(0.2535) Grad: 0.4581  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 2s (remain 17m 57s) Loss: 0.4404(0.2530) Grad: 2.4186  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 2m 22s (remain 17m 35s) Loss: 0.3123(0.2548) Grad: 0.4441  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 42s (remain 17m 14s) Loss: 0.1920(0.2547) Grad: 0.5941  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.2747(

Epoch 1 - avg_train_loss: 0.2523  avg_val_loss: 0.2523  time: 1509s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 54m 19s) Loss: 0.3130(0.3130) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 21s (remain 20m 6s) Loss: 0.2742(0.2543) Grad: 0.1990  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 41s (remain 19m 38s) Loss: 0.1541(0.2525) Grad: 1.2654  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 1s (remain 19m 10s) Loss: 0.2739(0.2541) Grad: 0.0683  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 22s (remain 18m 45s) Loss: 0.2338(0.2512) Grad: 0.1942  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 42s (remain 18m 25s) Loss: 0.1533(0.2535) Grad: 1.2501  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 2s (remain 18m 1s) Loss: 0.1565(0.2515) Grad: 1.4048  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 2m 23s (remain 17m 38s) Loss: 0.3916(0.2520) Grad: 1.5860  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 43s (remain 17m 16s) Loss: 0.3157(0.2511) Grad: 0.8192  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 3m 3s (remain 16m 54s) Loss: 0.2761(

Epoch 2 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1508s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 53m 50s) Loss: 0.1542(0.1542) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 20s (remain 19m 52s) Loss: 0.1517(0.2477) Grad: 1.1764  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 40s (remain 19m 20s) Loss: 0.2754(0.2491) Grad: 0.3789  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 1s (remain 18m 59s) Loss: 0.1923(0.2470) Grad: 0.5711  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 21s (remain 18m 38s) Loss: 0.2347(0.2480) Grad: 0.2551  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 41s (remain 18m 16s) Loss: 0.1927(0.2471) Grad: 0.6694  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 2s (remain 17m 55s) Loss: 0.4717(0.2502) Grad: 2.6461  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.2344(0.2502) Grad: 0.3299  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 42s (remain 17m 14s) Loss: 0.2340(0.2502) Grad: 0.2181  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 3m 3s (remain 16m 53s) Loss: 0.234

Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1510s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 54m 56s) Loss: 0.3145(0.3145) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 20s (remain 19m 50s) Loss: 0.2336(0.2463) Grad: 0.1710  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 41s (remain 19m 20s) Loss: 0.2339(0.2488) Grad: 0.1904  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 1s (remain 18m 56s) Loss: 0.1525(0.2490) Grad: 1.1840  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 21s (remain 18m 34s) Loss: 0.2340(0.2491) Grad: 0.1867  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 41s (remain 18m 13s) Loss: 0.2345(0.2512) Grad: 0.2290  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 2s (remain 17m 55s) Loss: 0.1937(0.2509) Grad: 0.7051  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.2747(0.2500) Grad: 0.3369  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 42s (remain 17m 13s) Loss: 0.3154(0.2503) Grad: 0.7908  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.233

Epoch 4 - avg_train_loss: 0.2517  avg_val_loss: 0.2517  time: 1512s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.0
recall score : 0.0
precision score : 0.0


Score: 0.0000
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 0s (remain 61m 45s) Loss: 0.4204(0.4204) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 20s (remain 19m 50s) Loss: 0.2866(0.2479) Grad: 2.0927  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 40s (remain 19m 17s) Loss: 0.2532(0.2380) Grad: 0.9406  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 1s (remain 18m 55s) Loss: 0.3464(0.2322) Grad: 2.7206  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 21s (remain 18m 32s) Loss: 0.2725(0.2319) Grad: 0.9113  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 41s (remain 18m 12s) Loss: 0.2571(0.2264) Grad: 1.7943  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 1s (remain 17m 51s) Loss: 0.1945(0.2232) Grad: 0.7567  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 2m 21s (remain 17m 30s) Loss: 0.1715(0.2193) Grad: 0.8963  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 42s (remain 17m 10s) Loss: 0.4255(0.2181) Grad: 1.4160  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 3m 2s (remain 16m 49s) Loss: 0.360

Epoch 1 - avg_train_loss: 0.1878  avg_val_loss: 0.2189  time: 1508s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 53m 36s) Loss: 0.3101(0.3101) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 20s (remain 20m 1s) Loss: 0.1004(0.2204) Grad: 1.1619  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 41s (remain 19m 26s) Loss: 0.1735(0.2095) Grad: 0.3385  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 1s (remain 19m 4s) Loss: 0.1263(0.2008) Grad: 0.5453  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 21s (remain 18m 38s) Loss: 0.1968(0.2011) Grad: 1.0213  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 42s (remain 18m 18s) Loss: 0.2107(0.1992) Grad: 1.3513  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 2s (remain 17m 55s) Loss: 0.1941(0.1961) Grad: 0.7809  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 2m 22s (remain 17m 34s) Loss: 0.1929(0.1966) Grad: 1.7306  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 42s (remain 17m 13s) Loss: 0.2341(0.1951) Grad: 1.2542  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 3m 2s (remain 16m 52s) Loss: 0.2920(

Epoch 2 - avg_train_loss: 0.1790  avg_val_loss: 0.1645  time: 1507s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 54m 22s) Loss: 0.1642(0.1642) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 20s (remain 20m 0s) Loss: 0.2051(0.1539) Grad: 0.9026  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 41s (remain 19m 41s) Loss: 0.0822(0.1576) Grad: 0.5021  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 2s (remain 19m 13s) Loss: 0.1803(0.1564) Grad: 0.9660  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 22s (remain 18m 47s) Loss: 0.1733(0.1538) Grad: 0.9940  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 42s (remain 18m 23s) Loss: 0.2234(0.1545) Grad: 1.2238  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 2s (remain 18m 1s) Loss: 0.2322(0.1567) Grad: 0.6663  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 2m 23s (remain 17m 39s) Loss: 0.1823(0.1571) Grad: 0.9844  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 43s (remain 17m 18s) Loss: 0.1255(0.1562) Grad: 0.6556  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 3m 3s (remain 16m 56s) Loss: 0.0892(

Epoch 3 - avg_train_loss: 0.1524  avg_val_loss: 0.1585  time: 1505s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 54m 20s) Loss: 0.1525(0.1525) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 20s (remain 19m 55s) Loss: 0.1614(0.1456) Grad: 1.3079  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 41s (remain 19m 24s) Loss: 0.1194(0.1462) Grad: 0.9683  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 1s (remain 19m 0s) Loss: 0.0842(0.1466) Grad: 0.8546  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 21s (remain 18m 36s) Loss: 0.2041(0.1455) Grad: 1.0497  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 41s (remain 18m 14s) Loss: 0.1567(0.1460) Grad: 1.5477  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 2s (remain 17m 55s) Loss: 0.1197(0.1455) Grad: 1.9238  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 2m 22s (remain 17m 33s) Loss: 0.1555(0.1433) Grad: 0.7602  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 42s (remain 17m 11s) Loss: 0.1272(0.1434) Grad: 0.5577  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 3m 2s (remain 16m 50s) Loss: 0.0887

Epoch 4 - avg_train_loss: 0.1411  avg_val_loss: 0.1595  time: 1506s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.49347804440904125
recall score : 0.3797119215445909
precision score : 0.704577765140745


Score: 0.4935
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

f1 score : 0.2540979497719184
recall score : 0.15534307866752475
precision score : 0.6975368102380625


Score: 0.2541
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

In [None]:
from google.colab import runtime
runtime.unassign()