In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece



In [3]:
!nvidia-smi

Fri Aug 25 09:36:46 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    25W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
import os

DIR = "/content/drive/MyDrive/Competitions/Signate/MUFG2023"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


OUTPUT_EXP_DIR = DIR + '/output/EXP021/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:


# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=4
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=64
    fc_dropout=0.2
    target="is_fraud?"
    target_size=1
    max_len=256
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return f1_score(y_true, (y_pred>thresh).astype(int))

def get_f1_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = f1_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    return f1_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """

    for parameter in module.parameters():
        parameter.requires_grad = False

def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """

    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)

    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """

    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"

        if hasattr(embeddings_path, attr_name):
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np


train = pd.read_csv(os.path.join(INPUT_DIR,"train.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test.csv"))
card = pd.read_csv(os.path.join(INPUT_DIR, "card.csv"))
user = pd.read_csv(os.path.join(INPUT_DIR, "user.csv"))
sub = pd.read_csv(os.path.join(INPUT_DIR, "sample_submit.csv"), header=None)

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(card.shape)
display(card.head(3))

print(user.shape)
display(user.head(3))

print(sub.shape)
display(sub.head(3))

(471283, 12)


Unnamed: 0,index,user_id,card_id,amount,errors?,is_fraud?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,0,1721,0,$2.623,OK,0,209237,Joliet,IL,60436.0,5541,Swipe Transaction
1,1,1629,3,$6.4,OK,0,2568,Edgerton,WI,53534.0,5814,Swipe Transaction
2,2,655,3,$123.5,OK,0,345310,Ridgefield,WA,98642.0,7538,Swipe Transaction


(457958, 11)


Unnamed: 0,index,user_id,card_id,amount,errors?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,471283,541,3,$113.278,OK,324189,Orlando,FL,32821.0,4814,Swipe Transaction
1,471284,655,1,$293.944,OK,81219,Ridgefield,WA,98642.0,7538,Chip Transaction
2,471285,492,0,$47.4,OK,274755,Arlington Heights,IL,60004.0,5719,Swipe Transaction


(416, 10)


Unnamed: 0,user_id,card_id,card_brand,card_type,expires,has_chip,cards_issued,credit_limit,acct_open_date,year_pin_last_changed
0,39,0,Visa,Debit,09/2021,YES,1,$17117,05/2007,2010
1,39,1,Amex,Credit,11/2024,YES,2,$5400,10/2015,2015
2,41,0,Discover,Credit,03/2022,YES,2,$14800,12/2010,2011


(97, 17)


Unnamed: 0,user_id,current_age,retirement_age,birth_year,birth_month,gender,address,city,state,zipcode,latitude,longitude,per_capita_income_zipcode,yearly_income_person,total_debt,fico_score,num_credit_cards
0,39,57,64,1962,12,Female,442 Burns Boulevard,Mansfield,MA,2048,42.02,-71.21,$37407,$76274,$102611,698,2
1,41,39,66,1980,10,Female,3863 River Avenue,Lincoln,CA,95648,38.93,-121.25,$21829,$44506,$57994,849,3
2,47,40,67,1979,5,Female,8799 Elm Avenue,Mckinney,TX,75069,33.2,-96.65,$24684,$50329,$76759,625,4


(457958, 2)


Unnamed: 0,0,1
0,471283,0
1,471284,1
2,471285,0


In [10]:
train = train.merge(card, how="left", on=["user_id", "card_id"]).merge(user, how="left", on="user_id")

In [11]:
month_dict = {
   "01": "January",
   "02": "February",
   "03": "March",
   "04": "April",
   "05": "May",
   "06": "June",
   "07": "July",
   "08": "August",
   "09": "September",
   "10": "October",
   "11": "November",
   "12": "December"
}

def get_expires_values(df):
  _df = df["expires"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["expires_month"] = _df["month"].astype(str)
  df["expires_years"] = _df["years"].astype(str)
  return df

def get_acct_open_date_values(df):
  _df = df["acct_open_date"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["acct_open_date_month"] = _df["month"].astype(str)
  df["acct_open_date_years"] = _df["years"].astype(str)
  return df

train = get_expires_values(train)
train = get_acct_open_date_values(train)
train["expires_month"] = train["expires_month"].map(month_dict)
train["acct_open_date_month"] = train["acct_open_date_month"].map(month_dict)

In [12]:
train.fillna('unknown', inplace = True)

train["texts"] = "merchant" + "[SEP]" + train["amount"] + "[SEP]" + train["errors?"] + "[SEP]" + train["merchant_city"] + "[SEP]" + train["merchant_state"] + "[SEP]" + train["use_chip"] + "[SEP]" \
+ "card" + "[SEP]" + train["card_brand"] + "[SEP]" + train["card_type"] + "[SEP]" + train["expires_month"] + " " + train["expires_years"] + "[SEP]" + train["has_chip"] + "[SEP]" + train["acct_open_date_month"] + " " + train["acct_open_date_years"] + "[SEP]" + train["year_pin_last_changed"].astype(str) + "[SEP]" \
"user" + "[SEP]" + train["current_age"].astype(str) + " year old " + train["gender"] + "[SEP]" + "retired at age " + train["retirement_age"].astype(str) + "[SEP]" + train["address"] + "[SEP]" + train["city"] + "[SEP]" + train["state"] + "[SEP]" + train["per_capita_income_zipcode"] + "[SEP]" + train["yearly_income_person"] + "[SEP]" + train["total_debt"]

In [13]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train[CFG.target])):
    train.loc[val_ , "kfold"] = int(fold)

train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [14]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 23 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 471283/471283 [01:53<00:00, 4158.70it/s]
max_len: 97
INFO:__main__:max_len: 97


In [16]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.half)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

class ValidDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings


class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self.sig = nn.Sigmoid()

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        #output = self.sig(output)
        return output

In [18]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader),
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [19]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target].values

    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = ValidDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)

    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr,
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler

    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCELoss()

    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)

        # scoring
        score = get_score(valid_labels, predictions)
        f1_score = get_f1_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')


        if best_score < f1_score:
            best_score = f1_score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth",
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()

    return valid_folds

In [None]:
if __name__ == '__main__':

    def get_result(oof_df):
        labels = oof_df[CFG.target].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        f1_score = get_f1_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'F1 BEST Score: {f1_score:<.4f}')

    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.0",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Epoch: [1][0/5891] Elapsed 0m 1s (remain 125m 52s) Loss: 0.8779(0.8779) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 16s (remain 15m 45s) Loss: 0.2527(0.2756) Grad: 1.0957  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 32s (remain 15m 6s) Loss: 0.2325(0.2557) Grad: 0.6856  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 0m 47s (remain 14m 35s) Loss: 0.2163(0.2455) Grad: 0.9954  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 2s (remain 14m 11s) Loss: 0.1760(0.2409) Grad: 0.8348  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 17s (remain 13m 51s) Loss: 0.2766(0.2360) Grad: 1.4748  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 1m 32s (remain 13m 32s) Loss: 0.1301(0.2336) Grad: 0.9235  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 1m 47s (remain 13m 14s) Loss: 0.2539(0.2315) Grad: 1.1613  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 2s (remain 12m 55s) Loss: 0.3018(0.2309) Grad: 0.6694  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 2m 16s (remain 12m 37s) Loss: 0.22

Epoch 1 - avg_train_loss: 0.1931  avg_val_loss: 0.1774  time: 1037s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1931  avg_val_loss: 0.1774  time: 1037s
Epoch 1 - Score: 0.3649
INFO:__main__:Epoch 1 - Score: 0.3649
Epoch 1 - Save Best Score: 0.4925 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.4925 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 63m 3s) Loss: 0.2563(0.2563) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 17s (remain 16m 43s) Loss: 0.1024(0.1639) Grad: 1.3757  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 32s (remain 15m 30s) Loss: 0.1448(0.1741) Grad: 1.0342  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 0m 47s (remain 14m 50s) Loss: 0.2251(0.1728) Grad: 2.5936  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 2s (remain 14m 21s) Loss: 0.1073(0.1919) Grad: 1.3653  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 19s (remain 14m 10s) Loss: 0.1940(0.2054) Grad: 0.6699  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 1m 34s (remain 13m 47s) Loss: 0.2742(0.2111) Grad: 0.1269  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 1m 48s (remain 13m 25s) Loss: 0.1932(0.2172) Grad: 0.6154  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 3s (remain 13m 5s) Loss: 0.3511(0.2206) Grad: 0.8792  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 2m 18s (remain 12m 48s) Loss: 0.2339

Epoch 2 - avg_train_loss: 0.2482  avg_val_loss: 0.2519  time: 1036s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2482  avg_val_loss: 0.2519  time: 1036s
Epoch 2 - Score: 0.0000
INFO:__main__:Epoch 2 - Score: 0.0000


Epoch: [3][0/5891] Elapsed 0m 0s (remain 58m 55s) Loss: 0.3530(0.3530) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 15s (remain 14m 59s) Loss: 0.3147(0.2473) Grad: 0.6259  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 30s (remain 14m 35s) Loss: 0.1543(0.2507) Grad: 1.1217  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 0m 46s (remain 14m 16s) Loss: 0.4346(0.2531) Grad: 1.9281  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 1s (remain 14m 1s) Loss: 0.2744(0.2547) Grad: 0.0975  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 16s (remain 13m 43s) Loss: 0.1564(0.2543) Grad: 1.2143  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 1m 31s (remain 13m 25s) Loss: 0.3162(0.2539) Grad: 0.7436  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 1m 47s (remain 13m 19s) Loss: 0.2754(0.2537) Grad: 0.3664  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 2s (remain 13m 0s) Loss: 0.2344(0.2526) Grad: 0.2302  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 2m 17s (remain 12m 41s) Loss: 0.3167

Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1045s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2518  avg_val_loss: 0.2517  time: 1045s
Epoch 3 - Score: 0.0000
INFO:__main__:Epoch 3 - Score: 0.0000


Epoch: [4][0/5891] Elapsed 0m 0s (remain 61m 41s) Loss: 0.1130(0.1130) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 16s (remain 15m 28s) Loss: 0.3547(0.2535) Grad: 1.0693  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 31s (remain 14m 59s) Loss: 0.2747(0.2540) Grad: 0.2283  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 0m 47s (remain 14m 35s) Loss: 0.2346(0.2571) Grad: 0.2659  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 2s (remain 14m 19s) Loss: 0.3154(0.2551) Grad: 0.6991  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 17s (remain 13m 58s) Loss: 0.3152(0.2545) Grad: 0.6669  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 1m 33s (remain 13m 42s) Loss: 0.1946(0.2554) Grad: 0.7006  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 1m 49s (remain 13m 34s) Loss: 0.3154(0.2545) Grad: 0.6858  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 5s (remain 13m 16s) Loss: 0.1948(0.2554) Grad: 0.7143  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 2m 20s (remain 12m 58s) Loss: 0.15

Epoch 4 - avg_train_loss: 0.2517  avg_val_loss: 0.2517  time: 1051s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2517  avg_val_loss: 0.2517  time: 1051s
Epoch 4 - Score: 0.0000
INFO:__main__:Epoch 4 - Score: 0.0000


f1 score : 0.3649332719742292
recall score : 0.24302788844621515
precision score : 0.7322253000923361


Score: 0.3649
INFO:__main__:Score: 0.3649
F1 BEST Score: 0.4925
INFO:__main__:F1 BEST Score: 0.4925
DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.0",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__

Epoch: [1][0/5891] Elapsed 0m 0s (remain 68m 2s) Loss: 0.6782(0.6782) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 15s (remain 14m 59s) Loss: 0.2683(0.2776) Grad: 1.4521  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 30s (remain 14m 30s) Loss: 0.1592(0.2555) Grad: 0.5765  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 0m 46s (remain 14m 15s) Loss: 0.1477(0.2431) Grad: 0.6917  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 1s (remain 13m 59s) Loss: 0.2788(0.2355) Grad: 0.8773  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 16s (remain 13m 44s) Loss: 0.2852(0.2318) Grad: 1.0417  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 1m 33s (remain 13m 46s) Loss: 0.1426(0.2247) Grad: 0.4313  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 1m 49s (remain 13m 29s) Loss: 0.3774(0.2223) Grad: 1.2945  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 4s (remain 13m 12s) Loss: 0.1567(0.2201) Grad: 1.0844  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 2m 19s (remain 12m 54s) Loss: 0.171

Epoch 1 - avg_train_loss: 0.1892  avg_val_loss: 0.1679  time: 1050s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1892  avg_val_loss: 0.1679  time: 1050s
Epoch 1 - Score: 0.4424
INFO:__main__:Epoch 1 - Score: 0.4424
Epoch 1 - Save Best Score: 0.5118 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.5118 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 68m 20s) Loss: 0.1172(0.1172) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 17s (remain 16m 36s) Loss: 0.1230(0.1600) Grad: 0.9386  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 35s (remain 16m 35s) Loss: 0.1099(0.1631) Grad: 0.6880  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 0m 50s (remain 15m 37s) Loss: 0.1011(0.1633) Grad: 0.6114  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 5s (remain 14m 59s) Loss: 0.2437(0.1611) Grad: 0.6274  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 21s (remain 14m 33s) Loss: 0.1194(0.1618) Grad: 0.5941  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 1m 36s (remain 14m 7s) Loss: 0.0590(0.1634) Grad: 0.5410  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 1m 51s (remain 13m 44s) Loss: 0.1126(0.1635) Grad: 0.5555  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 6s (remain 13m 22s) Loss: 0.2681(0.1642) Grad: 1.1677  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 2m 21s (remain 13m 3s) Loss: 0.1031

Epoch 2 - avg_train_loss: 0.1623  avg_val_loss: 0.1594  time: 1056s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1623  avg_val_loss: 0.1594  time: 1056s
Epoch 2 - Score: 0.4731
INFO:__main__:Epoch 2 - Score: 0.4731
Epoch 2 - Save Best Score: 0.5408 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.5408 Model


Epoch: [3][0/5891] Elapsed 0m 0s (remain 62m 15s) Loss: 0.1801(0.1801) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 17s (remain 16m 20s) Loss: 0.0660(0.1573) Grad: 0.9527  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 33s (remain 15m 42s) Loss: 0.1676(0.1530) Grad: 0.7294  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 0m 48s (remain 15m 2s) Loss: 0.2124(0.1543) Grad: 0.7815  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 4s (remain 14m 36s) Loss: 0.1126(0.1532) Grad: 0.7451  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 19s (remain 14m 13s) Loss: 0.1405(0.1515) Grad: 0.7261  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 1m 34s (remain 13m 55s) Loss: 0.2678(0.1527) Grad: 1.7323  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 1m 50s (remain 13m 35s) Loss: 0.2561(0.1522) Grad: 1.6126  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 5s (remain 13m 18s) Loss: 0.0943(0.1510) Grad: 0.5577  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 2m 21s (remain 13m 1s) Loss: 0.0634

Epoch 3 - avg_train_loss: 0.1510  avg_val_loss: 0.1551  time: 1055s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1510  avg_val_loss: 0.1551  time: 1055s
Epoch 3 - Score: 0.4983
INFO:__main__:Epoch 3 - Score: 0.4983
Epoch 3 - Save Best Score: 0.5556 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.5556 Model


Epoch: [4][0/5891] Elapsed 0m 0s (remain 63m 18s) Loss: 0.0785(0.0785) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 18s (remain 17m 34s) Loss: 0.1217(0.1432) Grad: 0.9804  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 34s (remain 16m 11s) Loss: 0.0734(0.1391) Grad: 0.7746  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 0m 49s (remain 15m 25s) Loss: 0.1312(0.1413) Grad: 0.5628  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 6s (remain 15m 3s) Loss: 0.1389(0.1431) Grad: 0.7335  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 21s (remain 14m 41s) Loss: 0.2111(0.1440) Grad: 1.1831  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 1m 37s (remain 14m 15s) Loss: 0.1888(0.1455) Grad: 1.8081  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 1m 52s (remain 13m 51s) Loss: 0.1072(0.1446) Grad: 0.6464  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 7s (remain 13m 31s) Loss: 0.0802(0.1445) Grad: 1.0301  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 2m 22s (remain 13m 11s) Loss: 0.174

Epoch 4 - avg_train_loss: 0.1411  avg_val_loss: 0.1559  time: 1053s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1411  avg_val_loss: 0.1559  time: 1053s
Epoch 4 - Score: 0.5135
INFO:__main__:Epoch 4 - Score: 0.5135


f1 score : 0.49832877544819204
recall score : 0.37695372356726936
precision score : 0.7349865551239916


Score: 0.4983
INFO:__main__:Score: 0.4983
F1 BEST Score: 0.5556
INFO:__main__:F1 BEST Score: 0.5556
DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.32.0",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__

Epoch: [1][0/5891] Elapsed 0m 0s (remain 66m 49s) Loss: 0.4268(0.4268) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 16s (remain 15m 18s) Loss: 0.2040(0.2510) Grad: 0.4874  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 31s (remain 14m 55s) Loss: 0.1454(0.2406) Grad: 1.1923  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 0m 46s (remain 14m 30s) Loss: 0.3489(0.2288) Grad: 1.5855  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 2s (remain 14m 12s) Loss: 0.2159(0.2255) Grad: 0.2716  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 17s (remain 13m 54s) Loss: 0.0942(0.2205) Grad: 1.5348  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 1m 32s (remain 13m 38s) Loss: 0.1484(0.2214) Grad: 0.7976  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 1m 48s (remain 13m 22s) Loss: 0.0658(0.2200) Grad: 0.8058  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 4s (remain 13m 8s) Loss: 0.2400(0.2175) Grad: 1.1199  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 2m 19s (remain 12m 51s) Loss: 0.274

Epoch 1 - avg_train_loss: 0.1893  avg_val_loss: 0.1718  time: 1050s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 66m 35s) Loss: 0.1045(0.1045) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 16s (remain 15m 57s) Loss: 0.1810(0.1728) Grad: 0.6152  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 32s (remain 15m 21s) Loss: 0.1068(0.1706) Grad: 0.4140  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 0m 47s (remain 14m 45s) Loss: 0.0522(0.1685) Grad: 0.7347  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 3s (remain 14m 27s) Loss: 0.1087(0.1662) Grad: 0.5164  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 18s (remain 14m 9s) Loss: 0.1802(0.1668) Grad: 0.7235  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 1m 34s (remain 13m 51s) Loss: 0.1508(0.1671) Grad: 0.9950  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 1m 50s (remain 13m 40s) Loss: 0.2042(0.1668) Grad: 1.0246  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 6s (remain 13m 22s) Loss: 0.0723(0.1663) Grad: 0.7635  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 2m 21s (remain 13m 2s) Loss: 0.2803

Epoch 2 - avg_train_loss: 0.1627  avg_val_loss: 0.1599  time: 1055s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 67m 24s) Loss: 0.0624(0.0624) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 17s (remain 16m 50s) Loss: 0.1035(0.1440) Grad: 0.8683  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 33s (remain 15m 42s) Loss: 0.2076(0.1502) Grad: 1.6219  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 0m 49s (remain 15m 25s) Loss: 0.1490(0.1484) Grad: 1.0766  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 5s (remain 14m 50s) Loss: 0.2230(0.1530) Grad: 0.7452  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 20s (remain 14m 24s) Loss: 0.3162(0.1532) Grad: 1.3589  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 1m 35s (remain 14m 2s) Loss: 0.1083(0.1536) Grad: 0.6064  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 1m 51s (remain 13m 42s) Loss: 0.1082(0.1557) Grad: 0.7266  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 6s (remain 13m 21s) Loss: 0.1243(0.1561) Grad: 1.0857  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 2m 21s (remain 13m 2s) Loss: 0.0883

Epoch 3 - avg_train_loss: 0.1517  avg_val_loss: 0.1541  time: 1056s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 65m 44s) Loss: 0.1400(0.1400) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 16s (remain 16m 11s) Loss: 0.1589(0.1463) Grad: 0.8338  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 32s (remain 15m 28s) Loss: 0.1283(0.1500) Grad: 0.8708  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 0m 48s (remain 14m 52s) Loss: 0.1727(0.1496) Grad: 0.9697  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 3s (remain 14m 26s) Loss: 0.1769(0.1473) Grad: 1.0464  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 18s (remain 14m 4s) Loss: 0.0913(0.1462) Grad: 0.5195  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 1m 33s (remain 13m 44s) Loss: 0.1553(0.1452) Grad: 1.1018  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 1m 48s (remain 13m 25s) Loss: 0.1757(0.1451) Grad: 1.1023  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 4s (remain 13m 9s) Loss: 0.1298(0.1433) Grad: 0.8600  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 2m 20s (remain 12m 56s) Loss: 0.1931

Epoch 4 - avg_train_loss: 0.1427  avg_val_loss: 0.1543  time: 1058s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5078802206461781
recall score : 0.3949747203922169
precision score : 0.7111724137931035


Score: 0.5079
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 0s (remain 79m 6s) Loss: 0.5527(0.5527) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 18s (remain 17m 25s) Loss: 0.3069(0.2626) Grad: 1.0860  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 34s (remain 16m 5s) Loss: 0.1193(0.2396) Grad: 1.2813  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 0m 49s (remain 15m 27s) Loss: 0.2052(0.2323) Grad: 1.3800  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 6s (remain 15m 3s) Loss: 0.2393(0.2295) Grad: 0.6816  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 21s (remain 14m 37s) Loss: 0.3032(0.2260) Grad: 0.6681  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 1m 37s (remain 14m 18s) Loss: 0.1930(0.2245) Grad: 0.4691  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 1m 53s (remain 13m 58s) Loss: 0.1816(0.2244) Grad: 0.5429  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 10s (remain 13m 48s) Loss: 0.1875(0.2225) Grad: 0.4363  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 2m 26s (remain 13m 30s) Loss: 0.2449

Epoch 1 - avg_train_loss: 0.1951  avg_val_loss: 0.1799  time: 1075s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 65m 30s) Loss: 0.1294(0.1294) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 17s (remain 16m 44s) Loss: 0.1930(0.1672) Grad: 1.1761  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 33s (remain 15m 56s) Loss: 0.1238(0.1681) Grad: 0.4639  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 0m 49s (remain 15m 20s) Loss: 0.1525(0.1728) Grad: 0.7613  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 5s (remain 14m 52s) Loss: 0.1855(0.1713) Grad: 0.9173  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 20s (remain 14m 28s) Loss: 0.1165(0.1703) Grad: 0.3906  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 1m 36s (remain 14m 7s) Loss: 0.1262(0.1729) Grad: 0.8611  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 1m 52s (remain 13m 53s) Loss: 0.2472(0.1728) Grad: 0.9613  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 8s (remain 13m 39s) Loss: 0.2025(0.1721) Grad: 1.4227  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 2m 24s (remain 13m 19s) Loss: 0.083

Epoch 2 - avg_train_loss: 0.1673  avg_val_loss: 0.1632  time: 1062s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 96m 56s) Loss: 0.0993(0.0993) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 18s (remain 17m 18s) Loss: 0.0448(0.1510) Grad: 1.0474  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 33s (remain 15m 57s) Loss: 0.1793(0.1566) Grad: 0.8975  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 0m 49s (remain 15m 14s) Loss: 0.2068(0.1580) Grad: 1.3420  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 4s (remain 14m 39s) Loss: 0.1290(0.1610) Grad: 0.5098  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 19s (remain 14m 14s) Loss: 0.0612(0.1609) Grad: 1.2500  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 1m 34s (remain 13m 54s) Loss: 0.1710(0.1616) Grad: 1.0078  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 1m 50s (remain 13m 34s) Loss: 0.2634(0.1603) Grad: 1.0033  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 5s (remain 13m 15s) Loss: 0.1388(0.1615) Grad: 1.2924  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 2m 20s (remain 12m 58s) Loss: 0.04

Epoch 3 - avg_train_loss: 0.1529  avg_val_loss: 0.1586  time: 1060s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 67m 59s) Loss: 0.1508(0.1508) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 17s (remain 17m 6s) Loss: 0.2732(0.1468) Grad: 1.2764  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 34s (remain 16m 3s) Loss: 0.2140(0.1453) Grad: 1.7816  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 0m 50s (remain 15m 32s) Loss: 0.0525(0.1430) Grad: 0.8612  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 5s (remain 15m 0s) Loss: 0.1567(0.1431) Grad: 1.2413  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 21s (remain 14m 32s) Loss: 0.1083(0.1426) Grad: 0.6745  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 1m 36s (remain 14m 10s) Loss: 0.0567(0.1432) Grad: 1.1861  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 1m 51s (remain 13m 48s) Loss: 0.2495(0.1437) Grad: 1.4129  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 7s (remain 13m 28s) Loss: 0.1259(0.1419) Grad: 1.2675  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 2m 22s (remain 13m 9s) Loss: 0.2346(0

Epoch 4 - avg_train_loss: 0.1423  avg_val_loss: 0.1583  time: 1064s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5003484320557491
recall score : 0.38507508427827153
precision score : 0.7141233304916169


Score: 0.5003
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 1s (remain 125m 27s) Loss: 0.7686(0.7686) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 18s (remain 17m 19s) Loss: 0.3679(0.2720) Grad: 2.1029  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 33s (remain 15m 46s) Loss: 0.2700(0.2440) Grad: 2.0140  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 0m 48s (remain 15m 8s) Loss: 0.2323(0.2330) Grad: 0.7782  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 4s (remain 14m 39s) Loss: 0.2078(0.2299) Grad: 0.5115  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 1m 19s (remain 14m 16s) Loss: 0.2397(0.2278) Grad: 0.7441  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 1m 34s (remain 13m 55s) Loss: 0.1482(0.2240) Grad: 0.7523  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 1m 50s (remain 13m 38s) Loss: 0.1005(0.2203) Grad: 1.5847  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 2m 6s (remain 13m 21s) Loss: 0.1199(0.2189) Grad: 1.5582  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 2m 21s (remain 13m 4s) Loss: 0.231

Epoch 1 - avg_train_loss: 0.1906  avg_val_loss: 0.1745  time: 1064s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 69m 3s) Loss: 0.1666(0.1666) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 17s (remain 16m 31s) Loss: 0.2020(0.1689) Grad: 0.7009  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 33s (remain 15m 48s) Loss: 0.1482(0.1705) Grad: 0.4118  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 0m 49s (remain 15m 11s) Loss: 0.1384(0.1665) Grad: 0.8779  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 4s (remain 14m 47s) Loss: 0.1447(0.1658) Grad: 0.4381  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 1m 21s (remain 14m 32s) Loss: 0.1923(0.1671) Grad: 0.7363  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 1m 36s (remain 14m 10s) Loss: 0.1625(0.1680) Grad: 1.2983  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 1m 52s (remain 13m 49s) Loss: 0.1576(0.1689) Grad: 0.4984  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 2m 7s (remain 13m 29s) Loss: 0.1367(0.1696) Grad: 0.6181  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 2m 22s (remain 13m 10s) Loss: 0.254

Epoch 2 - avg_train_loss: 0.1635  avg_val_loss: 0.1627  time: 1055s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 95m 46s) Loss: 0.1159(0.1159) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 17s (remain 16m 32s) Loss: 0.0421(0.1545) Grad: 0.4295  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 33s (remain 15m 59s) Loss: 0.1782(0.1512) Grad: 0.6562  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 0m 49s (remain 15m 21s) Loss: 0.0964(0.1519) Grad: 0.4804  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 4s (remain 14m 46s) Loss: 0.2150(0.1524) Grad: 1.5197  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 1m 20s (remain 14m 21s) Loss: 0.1918(0.1551) Grad: 1.6333  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 1m 35s (remain 13m 58s) Loss: 0.1880(0.1561) Grad: 0.8372  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 1m 50s (remain 13m 37s) Loss: 0.2207(0.1553) Grad: 1.4044  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 2m 5s (remain 13m 19s) Loss: 0.2103(0.1554) Grad: 0.7762  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 2m 20s (remain 12m 59s) Loss: 0.31

Epoch 3 - avg_train_loss: 0.1522  avg_val_loss: 0.1594  time: 1052s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 76m 5s) Loss: 0.0928(0.0928) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 17s (remain 16m 26s) Loss: 0.0624(0.1474) Grad: 0.7990  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 32s (remain 15m 16s) Loss: 0.0875(0.1434) Grad: 0.4922  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 0m 47s (remain 14m 35s) Loss: 0.1479(0.1418) Grad: 0.6448  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 2s (remain 14m 10s) Loss: 0.1411(0.1427) Grad: 0.8619  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 1m 16s (remain 13m 48s) Loss: 0.2156(0.1418) Grad: 0.9811  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 1m 31s (remain 13m 27s) Loss: 0.1464(0.1422) Grad: 0.8819  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 1m 46s (remain 13m 9s) Loss: 0.1686(0.1428) Grad: 1.3192  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 2m 1s (remain 12m 51s) Loss: 0.1437(0.1429) Grad: 0.7947  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 2m 15s (remain 12m 33s) Loss: 0.0892

Epoch 4 - avg_train_loss: 0.1432  avg_val_loss: 0.1594  time: 1019s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.4716548561900793
recall score : 0.34676677903769537
precision score : 0.7371335504885993


Score: 0.4717
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

f1 score : 0.47148351875594524
recall score : 0.34936103705065735
precision score : 0.7248680612958607


Score: 0.4715
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

In [None]:
from google.colab import runtime
runtime.unassign()