In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m68.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m60.6 MB/s[0m eta [36m0:00:0

In [None]:
!nvidia-smi

Thu Sep  7 13:20:54 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    24W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
import os

DIR = "/content/drive/MyDrive/Competitions/Signate/MUFG2023"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


OUTPUT_EXP_DIR = DIR + '/output/EXP070/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [None]:


# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    # model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=4
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=64
    fc_dropout=0.2
    target="is_fraud?"
    target_size=1
    max_len=97
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [None]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return f1_score(y_true, (y_pred>thresh).astype(int))

def get_f1_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = f1_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    return f1_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed=CFG.seed)

In [None]:
def freeze(module):
    """
    Freezes module's parameters.
    """

    for parameter in module.parameters():
        parameter.requires_grad = False

def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """

    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)

    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """

    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"

        if hasattr(embeddings_path, attr_name):
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [None]:
import pandas as pd
import numpy as np


train = pd.read_csv(os.path.join(INPUT_DIR,"train.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test.csv"))
card = pd.read_csv(os.path.join(INPUT_DIR, "card.csv"))
user = pd.read_csv(os.path.join(INPUT_DIR, "user.csv"))
sub = pd.read_csv(os.path.join(INPUT_DIR, "sample_submit.csv"), header=None)

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(card.shape)
display(card.head(3))

print(user.shape)
display(user.head(3))

print(sub.shape)
display(sub.head(3))

(471283, 12)


Unnamed: 0,index,user_id,card_id,amount,errors?,is_fraud?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,0,1721,0,$2.623,OK,0,209237,Joliet,IL,60436.0,5541,Swipe Transaction
1,1,1629,3,$6.4,OK,0,2568,Edgerton,WI,53534.0,5814,Swipe Transaction
2,2,655,3,$123.5,OK,0,345310,Ridgefield,WA,98642.0,7538,Swipe Transaction


(457958, 11)


Unnamed: 0,index,user_id,card_id,amount,errors?,merchant_id,merchant_city,merchant_state,zip,mcc,use_chip
0,471283,541,3,$113.278,OK,324189,Orlando,FL,32821.0,4814,Swipe Transaction
1,471284,655,1,$293.944,OK,81219,Ridgefield,WA,98642.0,7538,Chip Transaction
2,471285,492,0,$47.4,OK,274755,Arlington Heights,IL,60004.0,5719,Swipe Transaction


(416, 10)


Unnamed: 0,user_id,card_id,card_brand,card_type,expires,has_chip,cards_issued,credit_limit,acct_open_date,year_pin_last_changed
0,39,0,Visa,Debit,09/2021,YES,1,$17117,05/2007,2010
1,39,1,Amex,Credit,11/2024,YES,2,$5400,10/2015,2015
2,41,0,Discover,Credit,03/2022,YES,2,$14800,12/2010,2011


(97, 17)


Unnamed: 0,user_id,current_age,retirement_age,birth_year,birth_month,gender,address,city,state,zipcode,latitude,longitude,per_capita_income_zipcode,yearly_income_person,total_debt,fico_score,num_credit_cards
0,39,57,64,1962,12,Female,442 Burns Boulevard,Mansfield,MA,2048,42.02,-71.21,$37407,$76274,$102611,698,2
1,41,39,66,1980,10,Female,3863 River Avenue,Lincoln,CA,95648,38.93,-121.25,$21829,$44506,$57994,849,3
2,47,40,67,1979,5,Female,8799 Elm Avenue,Mckinney,TX,75069,33.2,-96.65,$24684,$50329,$76759,625,4


(457958, 2)


Unnamed: 0,0,1
0,471283,0
1,471284,1
2,471285,0


In [None]:
month_dict = {
   "01": "January",
   "02": "February",
   "03": "March",
   "04": "April",
   "05": "May",
   "06": "June",
   "07": "July",
   "08": "August",
   "09": "September",
   "10": "October",
   "11": "November",
   "12": "December"
}

def get_expires_values(df):
  _df = df["expires"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["expires_month"] = _df["month"].astype(str)
  df["expires_years"] = _df["years"].astype(str)
  return df

def get_acct_open_date_values(df):
  _df = df["acct_open_date"].str.split('/').apply(pd.Series)
  _df.columns = ["month","years"]
  df["acct_open_date_month"] = _df["month"].astype(str)
  df["acct_open_date_years"] = _df["years"].astype(str)
  return df


card = get_expires_values(card)
card = get_acct_open_date_values(card)
card["expires_month"] = card["expires_month"].map(month_dict)
card["acct_open_date_month"] = card["acct_open_date_month"].map(month_dict)

In [None]:
train = train.merge(card, how="left", on=["user_id", "card_id"]).merge(user, how="left", on="user_id")

In [None]:
train.fillna('unknown', inplace = True)

train["texts"] = "merchant" + "[SEP]" + train["amount"] + "[SEP]" + train["errors?"] + "[SEP]" + train["merchant_city"] + "[SEP]" + train["merchant_state"] + "[SEP]" + train["use_chip"] + "[SEP]" \
+ "card" + "[SEP]" + train["card_brand"] + "[SEP]" + train["card_type"] + "[SEP]" + train["expires_month"] + " " + train["expires_years"] + "[SEP]" + train["has_chip"] + "[SEP]" + train["acct_open_date_month"] + " " + train["acct_open_date_years"] + "[SEP]" + train["year_pin_last_changed"].astype(str) + "[SEP]" \
+ "user" + "[SEP]" + train["current_age"].astype(str) + " year old " + train["gender"] + "[SEP]" + "retired at age " + train["retirement_age"].astype(str) + "[SEP]" + train["address"] + "[SEP]" + train["city"] + "[SEP]" + train["state"] + "[SEP]" + train["per_capita_income_zipcode"] + "[SEP]" + train["yearly_income_person"] + "[SEP]" + train["total_debt"]

In [None]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train[CFG.target])):
    train.loc[val_ , "kfold"] = int(fold)

train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [None]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Downloading (…)okenizer_config.json:   0%|          | 0.00/153 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/231k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/153 [00:00<?, ?B/s]

In [None]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 23 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 471283/471283 [02:42<00:00, 2899.68it/s]
max_len: 146
INFO:__main__:max_len: 146


In [None]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.half)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

class ValidDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df[CFG.target].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings


class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self.sig = nn.Sigmoid()

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        #output = self.sig(output)
        return output

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader),
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [None]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target].values

    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = ValidDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)

    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr,
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler

    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)

        # scoring
        score = get_score(valid_labels, predictions)
        f1_score = get_f1_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')


        if best_score < f1_score:
            best_score = f1_score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth",
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()

    return valid_folds

In [None]:
if __name__ == '__main__':

    def get_result(oof_df):
        labels = oof_df[CFG.target].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        f1_score = get_f1_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'F1 BEST Score: {f1_score:<.4f}')

    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

FunnelConfig {
  "_name_or_path": "funnel-transformer/medium",
  "activation_dropout": 0.0,
  "architectures": [
    "FunnelModel"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "attention_type": "relative_shift",
  "block_repeats": [
    1,
    2,
    2
  ],
  "block_sizes": [
    6,
    3,
    3
  ],
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "initializer_range": 0.1,
  "initializer_std": null,
  "layer_norm_eps": 1e-09,
  "max_position_embeddings": 512,
  "model_type": "funnel",
  "n_head": 12,
  "num_decoder_layers": 2,
  "output_hidden_states": true,
  "pool_q_only": true,
  "pooling_type": "mean",
  "rel_attn_type": "factorized",
  "separate_cls": true,
  "transformers_version": "4.33.1",
  "truncate_seq": true,
  "type_vocab_size": 3,
  "vocab_size": 30522
}

INFO:__main__:FunnelConfig {
  "_name_or_path": "funnel-transformer/medium",
  "activation_dropout": 

Downloading pytorch_model.bin:   0%|          | 0.00/524M [00:00<?, ?B/s]

Epoch: [1][0/5891] Elapsed 0m 4s (remain 395m 26s) Loss: 0.1827(0.1827) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 30s (remain 29m 18s) Loss: 0.1411(0.2454) Grad: 1.3566  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 57s (remain 27m 17s) Loss: 0.2700(0.2353) Grad: 3.7980  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 24s (remain 26m 15s) Loss: 0.2546(0.2346) Grad: 4.5516  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 51s (remain 25m 25s) Loss: 0.1575(0.2271) Grad: 0.4316  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 2m 18s (remain 24m 52s) Loss: 0.1088(0.2202) Grad: 1.1735  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 45s (remain 24m 20s) Loss: 0.1421(0.2184) Grad: 0.7102  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 3m 12s (remain 23m 43s) Loss: 0.2224(0.2152) Grad: 1.5239  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 3m 39s (remain 23m 15s) Loss: 0.1343(0.2135) Grad: 0.4160  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 4m 6s (remain 22m 45s) Loss: 0.

Epoch 1 - avg_train_loss: 0.1933  avg_val_loss: 0.1723  time: 1821s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1933  avg_val_loss: 0.1723  time: 1821s
Epoch 1 - Score: 0.3980
INFO:__main__:Epoch 1 - Score: 0.3980
Epoch 1 - Save Best Score: 0.4907 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.4907 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 68m 34s) Loss: 0.1771(0.1771) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 28s (remain 27m 29s) Loss: 0.1192(0.1776) Grad: 2.8594  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 55s (remain 26m 3s) Loss: 0.1826(0.1723) Grad: 1.6176  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 21s (remain 25m 17s) Loss: 0.1710(0.1748) Grad: 1.2651  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 48s (remain 24m 47s) Loss: 0.1488(0.1738) Grad: 0.5551  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 2m 15s (remain 24m 19s) Loss: 0.2190(0.1705) Grad: 1.1874  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 41s (remain 23m 45s) Loss: 0.1333(0.1717) Grad: 0.3437  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 3m 8s (remain 23m 19s) Loss: 0.1447(0.1721) Grad: 0.7039  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 3m 35s (remain 22m 51s) Loss: 0.1019(0.1729) Grad: 1.6942  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 4m 2s (remain 22m 20s) Loss: 0.286

Epoch 2 - avg_train_loss: 0.1649  avg_val_loss: 0.1606  time: 1822s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1649  avg_val_loss: 0.1606  time: 1822s
Epoch 2 - Score: 0.4322
INFO:__main__:Epoch 2 - Score: 0.4322
Epoch 2 - Save Best Score: 0.5340 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.5340 Model


Epoch: [3][0/5891] Elapsed 0m 0s (remain 68m 34s) Loss: 0.1392(0.1392) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 29s (remain 28m 3s) Loss: 0.1533(0.1464) Grad: 1.0494  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 56s (remain 26m 41s) Loss: 0.1835(0.1507) Grad: 0.8764  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 23s (remain 25m 47s) Loss: 0.0750(0.1540) Grad: 0.4827  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 50s (remain 25m 12s) Loss: 0.2100(0.1519) Grad: 0.6627  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 2m 18s (remain 24m 45s) Loss: 0.1644(0.1548) Grad: 0.6885  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 44s (remain 24m 11s) Loss: 0.2617(0.1533) Grad: 1.3023  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 3m 11s (remain 23m 37s) Loss: 0.1426(0.1533) Grad: 0.6610  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 3m 38s (remain 23m 9s) Loss: 0.2153(0.1535) Grad: 2.3849  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 4m 5s (remain 22m 40s) Loss: 0.119

Epoch 3 - avg_train_loss: 0.1520  avg_val_loss: 0.1562  time: 1818s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1520  avg_val_loss: 0.1562  time: 1818s
Epoch 3 - Score: 0.4884
INFO:__main__:Epoch 3 - Score: 0.4884
Epoch 3 - Save Best Score: 0.5550 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.5550 Model


Epoch: [4][0/5891] Elapsed 0m 0s (remain 73m 45s) Loss: 0.2069(0.2069) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 28s (remain 27m 25s) Loss: 0.1731(0.1541) Grad: 1.4514  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 55s (remain 26m 18s) Loss: 0.0720(0.1464) Grad: 0.7048  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 22s (remain 25m 25s) Loss: 0.0345(0.1423) Grad: 0.6270  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 49s (remain 24m 56s) Loss: 0.0601(0.1403) Grad: 0.5546  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 2m 16s (remain 24m 27s) Loss: 0.1600(0.1421) Grad: 1.1216  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 43s (remain 23m 55s) Loss: 0.1143(0.1400) Grad: 1.4837  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 3m 9s (remain 23m 25s) Loss: 0.1240(0.1410) Grad: 1.0907  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 3m 36s (remain 22m 58s) Loss: 0.1085(0.1404) Grad: 0.9793  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 4m 3s (remain 22m 31s) Loss: 0.12

Epoch 4 - avg_train_loss: 0.1410  avg_val_loss: 0.1568  time: 1825s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1410  avg_val_loss: 0.1568  time: 1825s
Epoch 4 - Score: 0.5036
INFO:__main__:Epoch 4 - Score: 0.5036
Epoch 4 - Save Best Score: 0.5558 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.5558 Model


f1 score : 0.503621391011013
recall score : 0.38890591480232917
precision score : 0.7143259217562623


Score: 0.5036
INFO:__main__:Score: 0.5036
F1 BEST Score: 0.5558
INFO:__main__:F1 BEST Score: 0.5558
FunnelConfig {
  "_name_or_path": "funnel-transformer/medium",
  "activation_dropout": 0.0,
  "architectures": [
    "FunnelModel"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "attention_type": "relative_shift",
  "block_repeats": [
    1,
    2,
    2
  ],
  "block_sizes": [
    6,
    3,
    3
  ],
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "initializer_range": 0.1,
  "initializer_std": null,
  "layer_norm_eps": 1e-09,
  "max_position_embeddings": 512,
  "model_type": "funnel",
  "n_head": 12,
  "num_decoder_layers": 2,
  "output_hidden_states": true,
  "pool_q_only": true,
  "pooling_type": "mean",
  "rel_attn_type": "factorized",
  "separate_cls": true,
  "transformers_version": "4.33.1",
  "truncate_seq": true,
  "type_vocab_size": 3,
  "vocab_size": 30522
}

I

Epoch: [1][0/5891] Elapsed 0m 0s (remain 72m 6s) Loss: 0.6587(0.6587) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 28s (remain 27m 18s) Loss: 0.3430(0.2876) Grad: 2.8169  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 54s (remain 25m 45s) Loss: 0.0348(0.2504) Grad: 2.3856  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 21s (remain 25m 19s) Loss: 0.2125(0.2387) Grad: 1.6840  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 48s (remain 24m 48s) Loss: 0.3044(0.2351) Grad: 2.3619  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 2m 14s (remain 24m 12s) Loss: 0.2494(0.2301) Grad: 3.5287  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 41s (remain 23m 44s) Loss: 0.2825(0.2277) Grad: 1.6889  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 3m 8s (remain 23m 17s) Loss: 0.2484(0.2238) Grad: 1.1363  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 3m 35s (remain 22m 49s) Loss: 0.2019(0.2203) Grad: 3.0550  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 4m 2s (remain 22m 20s) Loss: 0.310

Epoch 1 - avg_train_loss: 0.1882  avg_val_loss: 0.1663  time: 1812s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1882  avg_val_loss: 0.1663  time: 1812s
Epoch 1 - Score: 0.3983
INFO:__main__:Epoch 1 - Score: 0.3983
Epoch 1 - Save Best Score: 0.5183 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.5183 Model


Epoch: [2][0/5891] Elapsed 0m 0s (remain 71m 9s) Loss: 0.1186(0.1186) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 28s (remain 27m 19s) Loss: 0.1626(0.1752) Grad: 0.7039  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 54s (remain 25m 47s) Loss: 0.2700(0.1642) Grad: 1.1251  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 21s (remain 25m 9s) Loss: 0.1893(0.1698) Grad: 1.2762  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 48s (remain 24m 39s) Loss: 0.0905(0.1706) Grad: 1.7286  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 2m 14s (remain 24m 7s) Loss: 0.0672(0.1690) Grad: 1.8165  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 41s (remain 23m 37s) Loss: 0.1934(0.1677) Grad: 1.3536  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 3m 8s (remain 23m 12s) Loss: 0.0393(0.1663) Grad: 1.2150  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 3m 34s (remain 22m 46s) Loss: 0.1432(0.1650) Grad: 0.4687  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 4m 1s (remain 22m 15s) Loss: 0.0699(

Epoch 2 - avg_train_loss: 0.1639  avg_val_loss: 0.1604  time: 1830s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1639  avg_val_loss: 0.1604  time: 1830s
Epoch 2 - Score: 0.4992
INFO:__main__:Epoch 2 - Score: 0.4992
Epoch 2 - Save Best Score: 0.5372 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.5372 Model


Epoch: [3][0/5891] Elapsed 0m 0s (remain 75m 35s) Loss: 0.2493(0.2493) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 31s (remain 30m 34s) Loss: 0.2756(0.1606) Grad: 1.9998  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 58s (remain 27m 36s) Loss: 0.2018(0.1570) Grad: 1.5597  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 25s (remain 26m 30s) Loss: 0.1284(0.1582) Grad: 0.7197  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 52s (remain 25m 43s) Loss: 0.0728(0.1559) Grad: 0.7857  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 2m 19s (remain 25m 0s) Loss: 0.0536(0.1532) Grad: 1.0580  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 46s (remain 24m 23s) Loss: 0.1143(0.1539) Grad: 0.9609  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 3m 13s (remain 23m 52s) Loss: 0.1238(0.1536) Grad: 1.2420  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 3m 40s (remain 23m 21s) Loss: 0.2625(0.1548) Grad: 0.8940  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 4m 7s (remain 22m 48s) Loss: 0.05

Epoch 3 - avg_train_loss: 0.1516  avg_val_loss: 0.1557  time: 1844s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1516  avg_val_loss: 0.1557  time: 1844s
Epoch 3 - Score: 0.5255
INFO:__main__:Epoch 3 - Score: 0.5255
Epoch 3 - Save Best Score: 0.5572 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.5572 Model


Epoch: [4][0/5891] Elapsed 0m 0s (remain 73m 1s) Loss: 0.0652(0.0652) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 29s (remain 28m 25s) Loss: 0.1219(0.1439) Grad: 0.9328  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 57s (remain 27m 3s) Loss: 0.1726(0.1428) Grad: 0.7419  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 25s (remain 26m 28s) Loss: 0.0471(0.1430) Grad: 0.8755  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 54s (remain 26m 7s) Loss: 0.1892(0.1421) Grad: 2.0121  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 2m 22s (remain 25m 31s) Loss: 0.0544(0.1425) Grad: 1.0523  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 50s (remain 24m 58s) Loss: 0.2188(0.1432) Grad: 1.6463  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 3m 18s (remain 24m 29s) Loss: 0.2280(0.1432) Grad: 2.7148  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 3m 45s (remain 23m 53s) Loss: 0.1213(0.1434) Grad: 1.3881  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 4m 13s (remain 23m 22s) Loss: 0.242

Epoch 4 - avg_train_loss: 0.1411  avg_val_loss: 0.1558  time: 1851s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5254834714680385
recall score : 0.4226172234140362
precision score : 0.6945353815159909


Score: 0.5255
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 1s (remain 107m 32s) Loss: 0.4333(0.4333) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 29s (remain 27m 53s) Loss: 0.2612(0.2549) Grad: 5.5844  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 56s (remain 26m 41s) Loss: 0.2190(0.2419) Grad: 2.1273  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 24s (remain 26m 1s) Loss: 0.2062(0.2253) Grad: 1.0036  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 50s (remain 25m 18s) Loss: 0.2167(0.2234) Grad: 2.4602  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 2m 17s (remain 24m 44s) Loss: 0.1818(0.2255) Grad: 1.0320  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 45s (remain 24m 17s) Loss: 0.3042(0.2230) Grad: 3.2244  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 3m 12s (remain 23m 47s) Loss: 0.3057(0.2209) Grad: 2.7650  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 3m 39s (remain 23m 13s) Loss: 0.2427(0.2190) Grad: 1.7755  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 4m 6s (remain 22m 45s) Loss: 0.1

Epoch 1 - avg_train_loss: 0.1910  avg_val_loss: 0.1740  time: 1837s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 86m 54s) Loss: 0.2593(0.2593) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 29s (remain 28m 31s) Loss: 0.1106(0.1706) Grad: 1.5944  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 57s (remain 27m 6s) Loss: 0.1638(0.1721) Grad: 0.7962  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 24s (remain 26m 8s) Loss: 0.1805(0.1690) Grad: 1.7408  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 51s (remain 25m 27s) Loss: 0.2217(0.1713) Grad: 0.8834  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 2m 18s (remain 24m 54s) Loss: 0.3113(0.1715) Grad: 1.7916  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 47s (remain 24m 30s) Loss: 0.1650(0.1713) Grad: 1.6502  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 3m 14s (remain 23m 58s) Loss: 0.1221(0.1710) Grad: 1.1037  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 3m 41s (remain 23m 24s) Loss: 0.1703(0.1718) Grad: 1.2002  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 4m 8s (remain 22m 53s) Loss: 0.183

Epoch 2 - avg_train_loss: 0.1650  avg_val_loss: 0.1579  time: 1841s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 78m 12s) Loss: 0.0977(0.0977) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 29s (remain 28m 14s) Loss: 0.1752(0.1507) Grad: 1.0703  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 57s (remain 27m 0s) Loss: 0.1614(0.1475) Grad: 0.5974  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 24s (remain 26m 3s) Loss: 0.3113(0.1488) Grad: 1.6531  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 51s (remain 25m 25s) Loss: 0.1357(0.1506) Grad: 1.2783  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 2m 18s (remain 24m 51s) Loss: 0.1635(0.1504) Grad: 1.8132  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 46s (remain 24m 21s) Loss: 0.1140(0.1516) Grad: 1.2413  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 3m 14s (remain 24m 2s) Loss: 0.2031(0.1521) Grad: 1.0540  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 3m 41s (remain 23m 29s) Loss: 0.1407(0.1529) Grad: 0.9906  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 4m 8s (remain 22m 55s) Loss: 0.1979

Epoch 3 - avg_train_loss: 0.1524  avg_val_loss: 0.1545  time: 1838s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 73m 24s) Loss: 0.1113(0.1113) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 28s (remain 27m 1s) Loss: 0.1893(0.1455) Grad: 0.9381  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 55s (remain 26m 6s) Loss: 0.1259(0.1466) Grad: 1.2540  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 21s (remain 25m 18s) Loss: 0.1348(0.1484) Grad: 1.4520  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 48s (remain 24m 46s) Loss: 0.2280(0.1456) Grad: 1.7728  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 2m 15s (remain 24m 18s) Loss: 0.1421(0.1458) Grad: 0.8400  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 42s (remain 23m 49s) Loss: 0.0704(0.1449) Grad: 0.4659  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 3m 9s (remain 23m 19s) Loss: 0.2673(0.1448) Grad: 1.6881  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 3m 35s (remain 22m 48s) Loss: 0.2102(0.1449) Grad: 1.1357  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 4m 2s (remain 22m 20s) Loss: 0.0620

Epoch 4 - avg_train_loss: 0.1413  avg_val_loss: 0.1548  time: 1829s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5146119605186763
recall score : 0.40738470966753487
precision score : 0.6984502232729183


Score: 0.5146
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 1s (remain 123m 37s) Loss: 0.3323(0.3323) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 29s (remain 28m 21s) Loss: 0.2910(0.3206) Grad: 1.1047  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 57s (remain 27m 3s) Loss: 0.2020(0.2724) Grad: 0.7557  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 25s (remain 26m 33s) Loss: 0.2429(0.2486) Grad: 0.7772  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 52s (remain 25m 44s) Loss: 0.4019(0.2377) Grad: 5.3541  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 2m 19s (remain 24m 57s) Loss: 0.1423(0.2333) Grad: 0.9506  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 45s (remain 24m 18s) Loss: 0.1714(0.2272) Grad: 1.2358  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 3m 12s (remain 23m 45s) Loss: 0.1705(0.2234) Grad: 1.7265  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 3m 39s (remain 23m 12s) Loss: 0.1140(0.2223) Grad: 1.8226  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 4m 6s (remain 22m 43s) Loss: 0.1

Epoch 1 - avg_train_loss: 0.1910  avg_val_loss: 0.1716  time: 1834s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 79m 59s) Loss: 0.2377(0.2377) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 29s (remain 28m 24s) Loss: 0.2739(0.1704) Grad: 1.1789  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 56s (remain 26m 29s) Loss: 0.1396(0.1774) Grad: 0.9085  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 22s (remain 25m 35s) Loss: 0.1932(0.1761) Grad: 1.1013  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 49s (remain 24m 59s) Loss: 0.0718(0.1717) Grad: 0.7951  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 2m 17s (remain 24m 40s) Loss: 0.1702(0.1727) Grad: 0.7793  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 44s (remain 24m 6s) Loss: 0.0684(0.1731) Grad: 0.8297  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 3m 10s (remain 23m 30s) Loss: 0.0964(0.1719) Grad: 1.2380  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 3m 37s (remain 23m 0s) Loss: 0.1785(0.1703) Grad: 1.5909  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 4m 4s (remain 22m 31s) Loss: 0.189

Epoch 2 - avg_train_loss: 0.1656  avg_val_loss: 0.1621  time: 1844s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 78m 57s) Loss: 0.2466(0.2466) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 30s (remain 28m 40s) Loss: 0.2734(0.1497) Grad: 2.6759  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 57s (remain 27m 7s) Loss: 0.2302(0.1550) Grad: 0.7378  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 24s (remain 26m 14s) Loss: 0.1624(0.1536) Grad: 0.9083  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 51s (remain 25m 33s) Loss: 0.2515(0.1519) Grad: 1.3834  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 2m 18s (remain 24m 52s) Loss: 0.1158(0.1539) Grad: 0.4627  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 45s (remain 24m 16s) Loss: 0.0220(0.1551) Grad: 0.7046  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 3m 12s (remain 23m 47s) Loss: 0.1790(0.1551) Grad: 0.8206  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 3m 41s (remain 23m 26s) Loss: 0.1917(0.1549) Grad: 1.2368  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 4m 8s (remain 22m 56s) Loss: 0.15

Epoch 3 - avg_train_loss: 0.1528  avg_val_loss: 0.1569  time: 1837s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 74m 17s) Loss: 0.1912(0.1912) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 28s (remain 27m 39s) Loss: 0.1593(0.1464) Grad: 1.3518  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 55s (remain 26m 18s) Loss: 0.0920(0.1445) Grad: 1.0038  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 22s (remain 25m 28s) Loss: 0.1423(0.1438) Grad: 0.8543  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 49s (remain 25m 3s) Loss: 0.1832(0.1429) Grad: 1.3307  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 2m 17s (remain 24m 35s) Loss: 0.1393(0.1410) Grad: 0.8762  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 46s (remain 24m 22s) Loss: 0.2932(0.1405) Grad: 1.9112  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 3m 12s (remain 23m 46s) Loss: 0.1593(0.1400) Grad: 0.9530  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 3m 39s (remain 23m 15s) Loss: 0.2131(0.1402) Grad: 1.3429  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 4m 6s (remain 22m 47s) Loss: 0.16

Epoch 4 - avg_train_loss: 0.1411  avg_val_loss: 0.1573  time: 1838s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

f1 score : 0.5069205850593894
recall score : 0.39564817652467055
precision score : 0.7052717836656651


Score: 0.5069
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
    self

Epoch: [1][0/5891] Elapsed 0m 0s (remain 86m 23s) Loss: 0.4719(0.4719) Grad: nan  LR: 0.00002000  
Epoch: [1][100/5891] Elapsed 0m 28s (remain 26m 49s) Loss: 0.3005(0.2527) Grad: 3.3579  LR: 0.00002000  
Epoch: [1][200/5891] Elapsed 0m 54s (remain 25m 56s) Loss: 0.1527(0.2344) Grad: 1.5101  LR: 0.00002000  
Epoch: [1][300/5891] Elapsed 1m 21s (remain 25m 10s) Loss: 0.1526(0.2323) Grad: 1.2165  LR: 0.00001999  
Epoch: [1][400/5891] Elapsed 1m 48s (remain 24m 39s) Loss: 0.0939(0.2285) Grad: 2.4488  LR: 0.00001999  
Epoch: [1][500/5891] Elapsed 2m 14s (remain 24m 10s) Loss: 0.1769(0.2232) Grad: 2.2841  LR: 0.00001998  
Epoch: [1][600/5891] Elapsed 2m 42s (remain 23m 53s) Loss: 0.1382(0.2191) Grad: 0.7167  LR: 0.00001997  
Epoch: [1][700/5891] Elapsed 3m 9s (remain 23m 25s) Loss: 0.4529(0.2173) Grad: 5.6301  LR: 0.00001996  
Epoch: [1][800/5891] Elapsed 3m 36s (remain 22m 53s) Loss: 0.1665(0.2174) Grad: 0.5226  LR: 0.00001994  
Epoch: [1][900/5891] Elapsed 4m 2s (remain 22m 25s) Loss: 0.18

Epoch 1 - avg_train_loss: 0.1893  avg_val_loss: 0.1768  time: 1842s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [2][0/5891] Elapsed 0m 0s (remain 79m 31s) Loss: 0.2013(0.2013) Grad: nan  LR: 0.00001707  
Epoch: [2][100/5891] Elapsed 0m 29s (remain 28m 33s) Loss: 0.2208(0.1664) Grad: 1.8966  LR: 0.00001698  
Epoch: [2][200/5891] Elapsed 0m 57s (remain 27m 11s) Loss: 0.1442(0.1600) Grad: 0.9463  LR: 0.00001688  
Epoch: [2][300/5891] Elapsed 1m 25s (remain 26m 22s) Loss: 0.1277(0.1691) Grad: 1.8430  LR: 0.00001678  
Epoch: [2][400/5891] Elapsed 1m 52s (remain 25m 36s) Loss: 0.2834(0.1711) Grad: 1.0161  LR: 0.00001668  
Epoch: [2][500/5891] Elapsed 2m 20s (remain 25m 10s) Loss: 0.1321(0.1697) Grad: 1.4437  LR: 0.00001658  
Epoch: [2][600/5891] Elapsed 2m 47s (remain 24m 37s) Loss: 0.2289(0.1700) Grad: 1.1155  LR: 0.00001648  
Epoch: [2][700/5891] Elapsed 3m 15s (remain 24m 5s) Loss: 0.1880(0.1701) Grad: 0.9977  LR: 0.00001638  
Epoch: [2][800/5891] Elapsed 3m 42s (remain 23m 35s) Loss: 0.2305(0.1699) Grad: 1.5195  LR: 0.00001628  
Epoch: [2][900/5891] Elapsed 4m 9s (remain 23m 2s) Loss: 0.143

Epoch 2 - avg_train_loss: 0.1646  avg_val_loss: 0.1653  time: 1881s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [3][0/5891] Elapsed 0m 0s (remain 77m 7s) Loss: 0.2064(0.2064) Grad: nan  LR: 0.00001000  
Epoch: [3][100/5891] Elapsed 0m 30s (remain 28m 46s) Loss: 0.0825(0.1548) Grad: 1.4012  LR: 0.00000987  
Epoch: [3][200/5891] Elapsed 0m 58s (remain 27m 25s) Loss: 0.1127(0.1526) Grad: 2.1776  LR: 0.00000973  
Epoch: [3][300/5891] Elapsed 1m 25s (remain 26m 34s) Loss: 0.1357(0.1577) Grad: 0.6697  LR: 0.00000960  
Epoch: [3][400/5891] Elapsed 1m 52s (remain 25m 46s) Loss: 0.0497(0.1529) Grad: 0.9234  LR: 0.00000947  
Epoch: [3][500/5891] Elapsed 2m 22s (remain 25m 31s) Loss: 0.1632(0.1524) Grad: 1.4847  LR: 0.00000933  
Epoch: [3][600/5891] Elapsed 2m 49s (remain 24m 55s) Loss: 0.0329(0.1516) Grad: 0.7751  LR: 0.00000920  
Epoch: [3][700/5891] Elapsed 3m 17s (remain 24m 20s) Loss: 0.1125(0.1519) Grad: 2.3838  LR: 0.00000907  
Epoch: [3][800/5891] Elapsed 3m 44s (remain 23m 45s) Loss: 0.2133(0.1510) Grad: 1.4954  LR: 0.00000893  
Epoch: [3][900/5891] Elapsed 4m 11s (remain 23m 13s) Loss: 0.2

Epoch 3 - avg_train_loss: 0.1517  avg_val_loss: 0.1586  time: 1869s
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/usr/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tor

Epoch: [4][0/5891] Elapsed 0m 0s (remain 79m 39s) Loss: 0.0825(0.0825) Grad: nan  LR: 0.00000293  
Epoch: [4][100/5891] Elapsed 0m 29s (remain 27m 57s) Loss: 0.1996(0.1430) Grad: 0.9281  LR: 0.00000283  
Epoch: [4][200/5891] Elapsed 0m 56s (remain 26m 40s) Loss: 0.3044(0.1439) Grad: 1.6660  LR: 0.00000274  
Epoch: [4][300/5891] Elapsed 1m 23s (remain 25m 56s) Loss: 0.1656(0.1417) Grad: 1.8260  LR: 0.00000265  
Epoch: [4][400/5891] Elapsed 1m 51s (remain 25m 26s) Loss: 0.0923(0.1410) Grad: 0.6482  LR: 0.00000256  
Epoch: [4][500/5891] Elapsed 2m 19s (remain 25m 1s) Loss: 0.0922(0.1369) Grad: 0.9679  LR: 0.00000247  
Epoch: [4][600/5891] Elapsed 2m 46s (remain 24m 24s) Loss: 0.1052(0.1388) Grad: 0.7071  LR: 0.00000239  
Epoch: [4][700/5891] Elapsed 3m 13s (remain 23m 53s) Loss: 0.0943(0.1387) Grad: 0.6258  LR: 0.00000230  
Epoch: [4][800/5891] Elapsed 3m 40s (remain 23m 23s) Loss: 0.1379(0.1386) Grad: 0.6338  LR: 0.00000222  
Epoch: [4][900/5891] Elapsed 4m 8s (remain 22m 54s) Loss: 0.12

In [None]:
from google.colab import runtime
runtime.unassign()