# Import library

In [1]:
import os
import os, sys
import glob
import re
import datetime
from datetime import datetime, timedelta 
import math, copy, time
from itertools import cycle
from collections import defaultdict

from parse import parse
import numpy as np
import pandas as pd

import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F

# import torchvision
# import torchvision.transforms as transforms

from sklearn.metrics import roc_auc_score, plot_roc_curve
from sklearn import svm


from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import seaborn

%load_ext autoreload
%autoreload 2



In [2]:
pd.set_option("display.max_column", 100)

In [3]:
# Util functions
def size_and_ratio(df, col, dropna=True):
    if pd.__version__ > '1.1':
        sr = df.groupby(col, dropna=dropna).size().sort_values(ascending=False)
    elif dropna:
        sr = df.groupby(col).size().sort_values(ascending=False)
    else:  # dropna == False
        sr = df.fillna('nan').groupby(col).size().sort_values(ascending=False)
    sr_ratio = sr.copy() / sum(sr)
    print("Sum :", sum(sr), sr.shape)
    sr.name = 'size'
    sr_ratio.name = 'ratio'
    return pd.concat([sr, sr_ratio], axis=1)

# Data load

In [4]:
df_train = pd.read_csv("data/df_data_v3_1/df_train_v3_1.csv")
df_val = pd.read_csv("data/df_data_v3_1/df_valid_v3_1.csv")
df_test = pd.read_csv("data/df_data_v3_1/df_test_v3_1.csv")
print(df_train.shape, df_val.shape, df_test.shape)
df_train.head()

(14093, 24) (4647, 24) (4586, 24)


Unnamed: 0,baby_id,cough,fever,sore_throat,shortness_of_breath,head_ache,runny_nose,muscle_pain,chills,loss_of_taste,loss_of_smell,sputum,chest_pain,indication_other,indication_abroad,indication_contact,global_confirmed_ratio,confirmed_ratio,sigungu_confirmed_ratio,mask,gender,age_ratio,weekday,pcr_result
0,21099,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0.173311,0.007956,0.012987,1,0.0,0.18,3,0
1,21151,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.179009,0.008614,0.013084,1,0.0,0.17,4,0
2,21024,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0.179009,0.008614,0.013084,1,1.0,0.29,4,0
3,21176,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0.1315,0.008252,0.0,0,0.0,0.4,5,0
4,4467,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0.1315,0.008252,0.0,0,1.0,0.34,5,0


### Add CLS token

In [5]:
df_train['[CLS]'] = 0
df_val['[CLS]'] = 0
df_test['[CLS]'] = 0

In [6]:
import random
print("Train:", df_train.shape, df_train['baby_id'].nunique())
print("Validaiton:", df_val.shape, df_val['baby_id'].nunique())

Train: (14093, 25) 9641
Validaiton: (4647, 25) 3213


## Remove baby_id and results

In [7]:
exclude_cols = ['baby_id', 'pcr_result']
cols = [x for x in df_train.columns if x not in exclude_cols]

# move [CLS] at first
cols = ['[CLS]'] + [x for x in cols if x !='[CLS]']

df_train_gt = df_train[['pcr_result']]
df_train_input = df_train[cols]
df_val_gt = df_val[['pcr_result']]
df_val_input = df_val[cols]
df_test_gt = df_test[['pcr_result']]
df_test_input = df_test[cols]


In [8]:
cols

['[CLS]',
 'cough',
 'fever',
 'sore_throat',
 'shortness_of_breath',
 'head_ache',
 'runny_nose',
 'muscle_pain',
 'chills',
 'loss_of_taste',
 'loss_of_smell',
 'sputum',
 'chest_pain',
 'indication_other',
 'indication_abroad',
 'indication_contact',
 'global_confirmed_ratio',
 'confirmed_ratio',
 'sigungu_confirmed_ratio',
 'mask',
 'gender',
 'age_ratio',
 'weekday']

# Make category values

In [9]:
from model.attention_model import CategoricalAttentionModel
from model.utils import get_dict_category_from_dataset
from model.utils import get_category_key, create_cat_and_intensity_from_df

In [10]:
df_trainval = pd.concat([df_train, df_val], axis=0)
print(df_trainval.shape)

dict_category, df_trainval_typed = get_dict_category_from_dataset(df_trainval[cols])
_, df_train_typed = get_dict_category_from_dataset(df_train[cols])
_, df_val_typed = get_dict_category_from_dataset(df_val[cols])
_, df_test_typed = get_dict_category_from_dataset(df_test[cols])
dict_category

(18740, 25)
[CLS] category [0] ...
cough category [1 0] ...
fever category [0 1] ...
sore_throat category [0 1] ...
shortness_of_breath category [0 1] ...
head_ache category [1 0] ...
runny_nose category [0 1] ...
muscle_pain category [0 1] ...
chills category [0 1] ...
loss_of_taste category [0 1] ...
loss_of_smell category [0 1] ...
sputum category [1 0] ...
chest_pain category [0 1] ...
indication_other category [0 1] ...
indication_abroad category [0 1] ...
indication_contact category [1 0] ...
global_confirmed_ratio float [0.1733114  0.17900909 0.13149977 0.14631498 0.15655098 0.17425644
 0.17103502 0.16990724 0.12749546 0.14885249]
confirmed_ratio float [0.00795568 0.00861396 0.00825183 0.00797017 0.01154644 0.01142734
 0.0113018  0.01122937 0.01076423 0.00895517]
sigungu_confirmed_ratio float [0.01298674 0.01308374 0.         0.01324623 0.01342084 0.0135712
 0.01798011 0.01397984 0.01779546 0.0141302 ]
mask category [1 0] ...
gender category [0. 1.] ...
age_ratio float [0.18 0.1

{'[CLS]_0.0': 0,
 'cough_1.0': 1,
 'cough_0.0': 2,
 'fever_0.0': 3,
 'fever_1.0': 4,
 'sore_throat_0.0': 5,
 'sore_throat_1.0': 6,
 'shortness_of_breath_0.0': 7,
 'shortness_of_breath_1.0': 8,
 'head_ache_1.0': 9,
 'head_ache_0.0': 10,
 'runny_nose_0.0': 11,
 'runny_nose_1.0': 12,
 'muscle_pain_0.0': 13,
 'muscle_pain_1.0': 14,
 'chills_0.0': 15,
 'chills_1.0': 16,
 'loss_of_taste_0.0': 17,
 'loss_of_taste_1.0': 18,
 'loss_of_smell_0.0': 19,
 'loss_of_smell_1.0': 20,
 'sputum_1.0': 21,
 'sputum_0.0': 22,
 'chest_pain_0.0': 23,
 'chest_pain_1.0': 24,
 'indication_other_0.0': 25,
 'indication_other_1.0': 26,
 'indication_abroad_0.0': 27,
 'indication_abroad_1.0': 28,
 'indication_contact_1.0': 29,
 'indication_contact_0.0': 30,
 'global_confirmed_ratio': 31,
 'confirmed_ratio': 32,
 'sigungu_confirmed_ratio': 33,
 'mask_1.0': 34,
 'mask_0.0': 35,
 'gender_0.0': 36,
 'gender_1.0': 37,
 'age_ratio': 38,
 'weekday_3.0': 39,
 'weekday_4.0': 40,
 'weekday_5.0': 41,
 'weekday_0.0': 42,
 'weekd

### category 값, intensity 값 생성

### train 데이터 먼저

In [11]:
df_train_category, df_train_intensity = create_cat_and_intensity_from_df(df_train_typed, dict_category)

display(df_train_category.sample(10))
display(df_train_intensity.sample(10))

Unnamed: 0,[CLS],cough,fever,sore_throat,shortness_of_breath,head_ache,runny_nose,muscle_pain,chills,loss_of_taste,loss_of_smell,sputum,chest_pain,indication_other,indication_abroad,indication_contact,global_confirmed_ratio,confirmed_ratio,sigungu_confirmed_ratio,mask,gender,age_ratio,weekday
581,0.0,1.0,3.0,6.0,7.0,9.0,12.0,13.0,15.0,17.0,19.0,21.0,24.0,26.0,27.0,30.0,31.0,32.0,33.0,35.0,37.0,38.0,45.0
10502,0.0,1.0,3.0,6.0,7.0,9.0,12.0,13.0,15.0,17.0,19.0,22.0,23.0,25.0,27.0,29.0,31.0,32.0,33.0,34.0,37.0,38.0,43.0
13159,0.0,1.0,3.0,5.0,7.0,10.0,11.0,13.0,15.0,17.0,19.0,22.0,23.0,26.0,27.0,30.0,31.0,32.0,33.0,34.0,36.0,38.0,42.0
6647,0.0,1.0,3.0,6.0,7.0,9.0,11.0,14.0,15.0,17.0,19.0,21.0,23.0,25.0,27.0,29.0,31.0,32.0,33.0,34.0,36.0,38.0,41.0
10084,0.0,1.0,4.0,6.0,8.0,9.0,12.0,13.0,15.0,18.0,20.0,21.0,24.0,26.0,27.0,30.0,31.0,32.0,33.0,35.0,37.0,38.0,44.0
1372,0.0,1.0,3.0,6.0,7.0,9.0,11.0,13.0,16.0,17.0,19.0,21.0,23.0,25.0,27.0,29.0,31.0,32.0,33.0,34.0,36.0,38.0,41.0
11362,0.0,2.0,3.0,5.0,7.0,10.0,11.0,13.0,15.0,17.0,19.0,22.0,23.0,26.0,27.0,30.0,31.0,32.0,33.0,35.0,37.0,38.0,45.0
4822,0.0,1.0,4.0,6.0,7.0,9.0,12.0,13.0,15.0,17.0,19.0,21.0,23.0,26.0,27.0,30.0,31.0,32.0,33.0,35.0,36.0,38.0,39.0
4163,0.0,1.0,3.0,6.0,7.0,9.0,11.0,13.0,15.0,17.0,19.0,21.0,23.0,25.0,27.0,29.0,31.0,32.0,33.0,34.0,37.0,38.0,43.0
3332,0.0,1.0,3.0,6.0,7.0,9.0,12.0,13.0,15.0,17.0,19.0,22.0,23.0,26.0,27.0,30.0,31.0,32.0,33.0,34.0,36.0,38.0,45.0


Unnamed: 0,[CLS],cough,fever,sore_throat,shortness_of_breath,head_ache,runny_nose,muscle_pain,chills,loss_of_taste,loss_of_smell,sputum,chest_pain,indication_other,indication_abroad,indication_contact,global_confirmed_ratio,confirmed_ratio,sigungu_confirmed_ratio,mask,gender,age_ratio,weekday
10266,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.170558,0.173634,0.0,1.0,1.0,0.46,1.0
5162,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.425722,0.538707,0.232699,1.0,1.0,0.31,1.0
1585,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.300265,0.339139,0.0,1.0,1.0,0.39,1.0
9622,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.23262,0.33908,0.0,1.0,1.0,0.47,1.0
10618,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.19712,0.121378,0.323654,1.0,1.0,0.31,1.0
10474,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.117492,0.190635,0.0,1.0,1.0,0.3,1.0
4764,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.508594,0.655087,0.200437,1.0,1.0,0.29,1.0
11988,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.24869,0.066428,0.413265,1.0,1.0,0.32,1.0
12438,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.352172,0.181642,0.42981,1.0,1.0,0.63,1.0
5214,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.425722,0.538707,0.197727,1.0,1.0,0.35,1.0


예시로 sore_throat id값 확인해봄

In [12]:
size_and_ratio(df_train_category, 'sore_throat')

Sum : 14093 (2,)


Unnamed: 0_level_0,size,ratio
sore_throat,Unnamed: 1_level_1,Unnamed: 2_level_1
6.0,7723,0.548003
5.0,6370,0.451997


### df_val에서 df_cat값과 df_intensity 값 생성

In [13]:
df_val_category, df_val_intensity = create_cat_and_intensity_from_df(df_val_typed, dict_category)


### df_test에서도 마찬가지로 생성

In [14]:
df_test_category, df_test_intensity = create_cat_and_intensity_from_df(df_test_typed, dict_category)


In [15]:
train_np_cat = df_train_category.values
train_np_ins = df_train_intensity.values
train_np_gt = df_train_gt[['pcr_result']].values

val_np_cat = df_val_category.values
val_np_ins = df_val_intensity.values
val_np_gt = df_val_gt[['pcr_result']].values

test_np_cat = df_test_category.values
test_np_ins = df_test_intensity.values
test_np_gt = df_test_gt[['pcr_result']].values

train_np_gt, val_np_gt, test_np_gt

(array([[0],
        [0],
        [0],
        ...,
        [1],
        [0],
        [0]]),
 array([[0],
        [0],
        [0],
        ...,
        [0],
        [1],
        [0]]),
 array([[0],
        [0],
        [0],
        ...,
        [1],
        [0],
        [1]]))

In [16]:
train_input = [train_np_cat.copy(), train_np_ins.copy()]
train_gt = train_np_gt.copy()

valid_input = [val_np_cat.copy(), val_np_ins.copy()]
valid_gt = val_np_gt.copy()

test_input = [test_np_cat.copy(), test_np_ins.copy()]
test_gt = test_np_gt.copy()


In [17]:
print(train_input[0].shape, valid_input[0].shape, test_input[0].shape)
print(train_gt.shape, valid_gt.shape, test_gt.shape)

(14093, 23) (4647, 23) (4586, 23)
(14093, 1) (4647, 1) (4586, 1)


In [18]:
print("# of train data positive: ", train_gt.sum())
print("# of train data negative: ", train_gt.shape[0] - train_gt.sum())
print("ratio of P/N: ", train_gt.sum()/(train_gt.shape[0] - train_gt.sum()))
print("")

print("# of valid data positive: ", valid_gt.sum())
print("# of valid data negative: ", valid_gt.shape[0] - valid_gt.sum())
print("ratio of P/N: ", valid_gt.sum()/(valid_gt.shape[0] - valid_gt.sum()))
print("")

print("# of test data positive: ", test_gt.sum())
print("# of test data negative: ", test_gt.shape[0] - test_gt.sum())
print("ratio of P/N: ", test_gt.sum()/(test_gt.shape[0] - test_gt.sum()))

# of train data positive:  10063
# of train data negative:  4030
ratio of P/N:  2.4970223325062033

# of valid data positive:  3415
# of valid data negative:  1232
ratio of P/N:  2.7719155844155843

# of test data positive:  3332
# of test data negative:  1254
ratio of P/N:  2.657097288676236


### Check dict_category

In [19]:
dict_category

{'[CLS]_0.0': 0,
 'cough_1.0': 1,
 'cough_0.0': 2,
 'fever_0.0': 3,
 'fever_1.0': 4,
 'sore_throat_0.0': 5,
 'sore_throat_1.0': 6,
 'shortness_of_breath_0.0': 7,
 'shortness_of_breath_1.0': 8,
 'head_ache_1.0': 9,
 'head_ache_0.0': 10,
 'runny_nose_0.0': 11,
 'runny_nose_1.0': 12,
 'muscle_pain_0.0': 13,
 'muscle_pain_1.0': 14,
 'chills_0.0': 15,
 'chills_1.0': 16,
 'loss_of_taste_0.0': 17,
 'loss_of_taste_1.0': 18,
 'loss_of_smell_0.0': 19,
 'loss_of_smell_1.0': 20,
 'sputum_1.0': 21,
 'sputum_0.0': 22,
 'chest_pain_0.0': 23,
 'chest_pain_1.0': 24,
 'indication_other_0.0': 25,
 'indication_other_1.0': 26,
 'indication_abroad_0.0': 27,
 'indication_abroad_1.0': 28,
 'indication_contact_1.0': 29,
 'indication_contact_0.0': 30,
 'global_confirmed_ratio': 31,
 'confirmed_ratio': 32,
 'sigungu_confirmed_ratio': 33,
 'mask_1.0': 34,
 'mask_0.0': 35,
 'gender_0.0': 36,
 'gender_1.0': 37,
 'age_ratio': 38,
 'weekday_3.0': 39,
 'weekday_4.0': 40,
 'weekday_5.0': 41,
 'weekday_0.0': 42,
 'weekd

## Import model

In [46]:
from model.base_dataloader import IsraelDataLoader, IsraelDatasetWithIntensity
from model.utils import run_validation

In [47]:
print("cat value:", train_input[0].shape)
print("intensity value:", train_input[1].shape)
print("Ground truth value:", train_gt.shape)

cat value: (14093, 23)
intensity value: (14093, 23)
Ground truth value: (14093, 1)


# Dataset, data_loader

In [48]:
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

In [49]:
# creat dataset

train_dataset = IsraelDatasetWithIntensity(
    train_np_cat,
    train_np_ins,
    train_np_gt,
    dict_category
)

val_dataset = IsraelDatasetWithIntensity(
    val_np_cat,
    val_np_ins,
    val_np_gt,
    dict_category
)

test_dataset = IsraelDatasetWithIntensity(
    test_np_cat, 
    test_np_ins,
    test_np_gt,
    dict_category
)

In [None]:
def save_model(config_name, model, epoch):
    save_path = f"./train_data/{model.__class__.__name__}_{config_name}_{epoch:04d}.pth"
    print("save_path", save_path)
    torch.save(model.state_dict(), save_path)
    print("Success to save to : ", save_path)
    return model

### Simple attention

In [263]:
from model import simple_attention

In [277]:
batch_size = 128
epochs = 30000

train_loader = IsraelDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = IsraelDataLoader(val_dataset, batch_size=batch_size, shuffle=False)

len_seq = train_input[0].shape[1]
print("len_seq:", len_seq)
d_model = 16
n_head = 1
n_layer = 1
lr = 1e-4

model = simple_attention.CategoricalAttentionSimpleModel(
    dict_category=dict_category,
    d_model=d_model,
    n_head=n_head,
    len_seq=len_seq
)

model.eval()

print("d_model:", model.d_model, "n_head:", model.n_head, 'len_seq:', model.len_seq)
loss_func = nn.BCELoss(reduction='mean')
optimizer = optim.RAdam(model.parameters(), lr=lr)
config_name = f"layer{n_layer}_dmodel{d_model}_nhead{n_head}_lr{lr}"

train_log = dict()
val_log = dict()

val_metrics = {
    "loss": loss_func,
    'auc': lambda pred, gt: roc_auc_score(gt.numpy(), pred.numpy())
}


    

len_seq: 23
d_model: 16 n_head: 1 len_seq: 23


In [278]:
model

CategoricalAttentionSimpleModel(
  (cat_embedding): Linear(in_features=46, out_features=16, bias=True)
  (fc_key): Linear(in_features=16, out_features=16, bias=True)
  (fc_query): Linear(in_features=16, out_features=16, bias=True)
  (fc_value): Linear(in_features=16, out_features=16, bias=True)
  (multihead_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
  )
  (final_fc): Linear(in_features=16, out_features=1, bias=True)
  (final_sigmoid): Sigmoid()
)

### Test with very small dataset (2 datapoints)

In [279]:
val_np_cat[7:9],    val_np_ins[7:9],    val_np_gt[7:9],

(array([[ 0.,  2.,  3.,  5.,  7., 10., 11., 13., 15., 17., 19., 22., 23.,
         26., 27., 30., 31., 32., 33., 34., 37., 38., 39.],
        [ 0.,  1.,  3.,  5.,  7., 10., 11., 13., 15., 17., 19., 22., 23.,
         26., 27., 30., 31., 32., 33., 34., 37., 38., 40.]]),
 array([[1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 0.18225966, 0.01196491, 0.        , 1.        ,
         1.        , 0.41      , 1.        ],
        [1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 0.18804529, 0.01177016, 0.02688911, 1.        ,
         1.        , 0.25      , 1.        ]]),
 array([[0],
        [1]]))

In [280]:
small_test_dataset = IsraelDatasetWithIntensity(
    val_np_cat[7:9],
    val_np_ins[7:9],
    val_np_gt[7:9],
    dict_category
)

for i in range(10000):
    small_test_loader = IsraelDataLoader(small_test_dataset, batch_size=batch_size, shuffle=False)
    result_info = model.train_on_epoch(i, small_test_loader, model, loss_func, optimizer)


INFO:model.base_model:Epoch Step: 0 Train Loss: 0.694186270236969 elapsed: 0.0035219192504882812
INFO:model.base_model:Epoch Step: 1 Train Loss: 0.6941860914230347 elapsed: 0.0022530555725097656
INFO:model.base_model:Epoch Step: 2 Train Loss: 0.6941860318183899 elapsed: 0.001995086669921875
INFO:model.base_model:Epoch Step: 3 Train Loss: 0.6941858530044556 elapsed: 0.002009153366088867
INFO:model.base_model:Epoch Step: 4 Train Loss: 0.6941856145858765 elapsed: 0.001561880111694336
INFO:model.base_model:Epoch Step: 5 Train Loss: 0.6941852569580078 elapsed: 0.0020020008087158203
INFO:model.base_model:Epoch Step: 6 Train Loss: 0.694184422492981 elapsed: 0.0018601417541503906
INFO:model.base_model:Epoch Step: 7 Train Loss: 0.694183349609375 elapsed: 0.002974987030029297
INFO:model.base_model:Epoch Step: 8 Train Loss: 0.6941820979118347 elapsed: 0.0023348331451416016
INFO:model.base_model:Epoch Step: 9 Train Loss: 0.6941806077957153 elapsed: 0.002583026885986328
INFO:model.base_model:Epoch 

INFO:model.base_model:Epoch Step: 84 Train Loss: 0.6937612295150757 elapsed: 0.0018947124481201172
INFO:model.base_model:Epoch Step: 85 Train Loss: 0.693753719329834 elapsed: 0.0018811225891113281
INFO:model.base_model:Epoch Step: 86 Train Loss: 0.6937459707260132 elapsed: 0.0017528533935546875
INFO:model.base_model:Epoch Step: 87 Train Loss: 0.6937383413314819 elapsed: 0.0017468929290771484
INFO:model.base_model:Epoch Step: 88 Train Loss: 0.6937307715415955 elapsed: 0.0019431114196777344
INFO:model.base_model:Epoch Step: 89 Train Loss: 0.6937230825424194 elapsed: 0.001967906951904297
INFO:model.base_model:Epoch Step: 90 Train Loss: 0.693715512752533 elapsed: 0.0019218921661376953
INFO:model.base_model:Epoch Step: 91 Train Loss: 0.6937078237533569 elapsed: 0.002259969711303711
INFO:model.base_model:Epoch Step: 92 Train Loss: 0.6937001943588257 elapsed: 0.0020351409912109375
INFO:model.base_model:Epoch Step: 93 Train Loss: 0.6936925649642944 elapsed: 0.0022399425506591797
INFO:model.bas

INFO:model.base_model:Epoch Step: 167 Train Loss: 0.6932262778282166 elapsed: 0.0016279220581054688
INFO:model.base_model:Epoch Step: 168 Train Loss: 0.6932222843170166 elapsed: 0.0015139579772949219
INFO:model.base_model:Epoch Step: 169 Train Loss: 0.6932185888290405 elapsed: 0.0017468929290771484
INFO:model.base_model:Epoch Step: 170 Train Loss: 0.6932147741317749 elapsed: 0.0016438961029052734
INFO:model.base_model:Epoch Step: 171 Train Loss: 0.6932111978530884 elapsed: 0.0015749931335449219
INFO:model.base_model:Epoch Step: 172 Train Loss: 0.6932076215744019 elapsed: 0.0015568733215332031
INFO:model.base_model:Epoch Step: 173 Train Loss: 0.6932041645050049 elapsed: 0.0018038749694824219
INFO:model.base_model:Epoch Step: 174 Train Loss: 0.6932008266448975 elapsed: 0.0016961097717285156
INFO:model.base_model:Epoch Step: 175 Train Loss: 0.6931976079940796 elapsed: 0.001699209213256836
INFO:model.base_model:Epoch Step: 176 Train Loss: 0.6931943893432617 elapsed: 0.0017306804656982422
I

INFO:model.base_model:Epoch Step: 250 Train Loss: 0.6932631731033325 elapsed: 0.0016028881072998047
INFO:model.base_model:Epoch Step: 251 Train Loss: 0.693268358707428 elapsed: 0.0017580986022949219
INFO:model.base_model:Epoch Step: 252 Train Loss: 0.693273663520813 elapsed: 0.0016140937805175781
INFO:model.base_model:Epoch Step: 253 Train Loss: 0.6932790279388428 elapsed: 0.0016880035400390625
INFO:model.base_model:Epoch Step: 254 Train Loss: 0.6932845115661621 elapsed: 0.0016393661499023438
INFO:model.base_model:Epoch Step: 255 Train Loss: 0.6932900547981262 elapsed: 0.0019130706787109375
INFO:model.base_model:Epoch Step: 256 Train Loss: 0.6932957172393799 elapsed: 0.0016300678253173828
INFO:model.base_model:Epoch Step: 257 Train Loss: 0.6933015584945679 elapsed: 0.0018420219421386719
INFO:model.base_model:Epoch Step: 258 Train Loss: 0.6933073997497559 elapsed: 0.0016660690307617188
INFO:model.base_model:Epoch Step: 259 Train Loss: 0.6933133602142334 elapsed: 0.001725912094116211
INF

INFO:model.base_model:Epoch Step: 333 Train Loss: 0.6938447952270508 elapsed: 0.0016040802001953125
INFO:model.base_model:Epoch Step: 334 Train Loss: 0.693850576877594 elapsed: 0.0016748905181884766
INFO:model.base_model:Epoch Step: 335 Train Loss: 0.6938562393188477 elapsed: 0.0033118724822998047
INFO:model.base_model:Epoch Step: 336 Train Loss: 0.6938617825508118 elapsed: 0.0054929256439208984
INFO:model.base_model:Epoch Step: 337 Train Loss: 0.6938672065734863 elapsed: 0.005788087844848633
INFO:model.base_model:Epoch Step: 338 Train Loss: 0.6938725113868713 elapsed: 0.0022361278533935547
INFO:model.base_model:Epoch Step: 339 Train Loss: 0.6938776969909668 elapsed: 0.001993894577026367
INFO:model.base_model:Epoch Step: 340 Train Loss: 0.6938827037811279 elapsed: 0.002004861831665039
INFO:model.base_model:Epoch Step: 341 Train Loss: 0.6938876509666443 elapsed: 0.0020880699157714844
INFO:model.base_model:Epoch Step: 342 Train Loss: 0.6938924789428711 elapsed: 0.0022749900817871094
INFO

INFO:model.base_model:Epoch Step: 416 Train Loss: 0.6937911510467529 elapsed: 0.0017719268798828125
INFO:model.base_model:Epoch Step: 417 Train Loss: 0.6937843561172485 elapsed: 0.0018579959869384766
INFO:model.base_model:Epoch Step: 418 Train Loss: 0.6937774419784546 elapsed: 0.0017499923706054688
INFO:model.base_model:Epoch Step: 419 Train Loss: 0.6937704086303711 elapsed: 0.0020170211791992188
INFO:model.base_model:Epoch Step: 420 Train Loss: 0.6937633752822876 elapsed: 0.0017940998077392578
INFO:model.base_model:Epoch Step: 421 Train Loss: 0.6937562227249146 elapsed: 0.0016608238220214844
INFO:model.base_model:Epoch Step: 422 Train Loss: 0.6937490701675415 elapsed: 0.0016047954559326172
INFO:model.base_model:Epoch Step: 423 Train Loss: 0.6937417387962341 elapsed: 0.0018000602722167969
INFO:model.base_model:Epoch Step: 424 Train Loss: 0.6937344074249268 elapsed: 0.0019829273223876953
INFO:model.base_model:Epoch Step: 425 Train Loss: 0.6937270164489746 elapsed: 0.0019221305847167969


INFO:model.base_model:Epoch Step: 499 Train Loss: 0.6932302117347717 elapsed: 0.0016388893127441406
INFO:model.base_model:Epoch Step: 500 Train Loss: 0.693226158618927 elapsed: 0.0016756057739257812
INFO:model.base_model:Epoch Step: 501 Train Loss: 0.6932222843170166 elapsed: 0.001641988754272461
INFO:model.base_model:Epoch Step: 502 Train Loss: 0.693218469619751 elapsed: 0.0016379356384277344
INFO:model.base_model:Epoch Step: 503 Train Loss: 0.6932147145271301 elapsed: 0.0016641616821289062
INFO:model.base_model:Epoch Step: 504 Train Loss: 0.6932110786437988 elapsed: 0.0017311573028564453
INFO:model.base_model:Epoch Step: 505 Train Loss: 0.6932076215744019 elapsed: 0.0016338825225830078
INFO:model.base_model:Epoch Step: 506 Train Loss: 0.6932041645050049 elapsed: 0.0015671253204345703
INFO:model.base_model:Epoch Step: 507 Train Loss: 0.6932008862495422 elapsed: 0.0017740726470947266
INFO:model.base_model:Epoch Step: 508 Train Loss: 0.6931976675987244 elapsed: 0.0014338493347167969
INF

INFO:model.base_model:Epoch Step: 582 Train Loss: 0.6932106614112854 elapsed: 0.0014998912811279297
INFO:model.base_model:Epoch Step: 583 Train Loss: 0.6932135224342346 elapsed: 0.0018863677978515625
INFO:model.base_model:Epoch Step: 584 Train Loss: 0.6932163834571838 elapsed: 0.0018870830535888672
INFO:model.base_model:Epoch Step: 585 Train Loss: 0.6932193040847778 elapsed: 0.0015289783477783203
INFO:model.base_model:Epoch Step: 586 Train Loss: 0.6932222843170166 elapsed: 0.0016727447509765625
INFO:model.base_model:Epoch Step: 587 Train Loss: 0.6932253241539001 elapsed: 0.001783132553100586
INFO:model.base_model:Epoch Step: 588 Train Loss: 0.6932284235954285 elapsed: 0.0016901493072509766
INFO:model.base_model:Epoch Step: 589 Train Loss: 0.6932315826416016 elapsed: 0.001725912094116211
INFO:model.base_model:Epoch Step: 590 Train Loss: 0.6932346820831299 elapsed: 0.00186920166015625
INFO:model.base_model:Epoch Step: 591 Train Loss: 0.6932379007339478 elapsed: 0.0017769336700439453
INFO

INFO:model.base_model:Epoch Step: 665 Train Loss: 0.6935180425643921 elapsed: 0.0015418529510498047
INFO:model.base_model:Epoch Step: 666 Train Loss: 0.6935214996337891 elapsed: 0.0015780925750732422
INFO:model.base_model:Epoch Step: 667 Train Loss: 0.693524956703186 elapsed: 0.0018720626831054688
INFO:model.base_model:Epoch Step: 668 Train Loss: 0.693528413772583 elapsed: 0.0016791820526123047
INFO:model.base_model:Epoch Step: 669 Train Loss: 0.69353187084198 elapsed: 0.001683950424194336
INFO:model.base_model:Epoch Step: 670 Train Loss: 0.6935352087020874 elapsed: 0.0016391277313232422
INFO:model.base_model:Epoch Step: 671 Train Loss: 0.6935386061668396 elapsed: 0.0017321109771728516
INFO:model.base_model:Epoch Step: 672 Train Loss: 0.6935420036315918 elapsed: 0.001611948013305664
INFO:model.base_model:Epoch Step: 673 Train Loss: 0.6935452222824097 elapsed: 0.0015900135040283203
INFO:model.base_model:Epoch Step: 674 Train Loss: 0.6935484409332275 elapsed: 0.0015869140625
INFO:model.b

INFO:model.base_model:Epoch Step: 748 Train Loss: 0.6936678886413574 elapsed: 0.0014541149139404297
INFO:model.base_model:Epoch Step: 749 Train Loss: 0.6936675310134888 elapsed: 0.0015671253204345703
INFO:model.base_model:Epoch Step: 750 Train Loss: 0.6936671733856201 elapsed: 0.0016298294067382812
INFO:model.base_model:Epoch Step: 751 Train Loss: 0.6936667561531067 elapsed: 0.0014939308166503906
INFO:model.base_model:Epoch Step: 752 Train Loss: 0.6936663389205933 elapsed: 0.0021729469299316406
INFO:model.base_model:Epoch Step: 753 Train Loss: 0.6936658024787903 elapsed: 0.002023935317993164
INFO:model.base_model:Epoch Step: 754 Train Loss: 0.6936652064323425 elapsed: 0.001901865005493164
INFO:model.base_model:Epoch Step: 755 Train Loss: 0.6936646699905396 elapsed: 0.0018169879913330078
INFO:model.base_model:Epoch Step: 756 Train Loss: 0.693664014339447 elapsed: 0.0016171932220458984
INFO:model.base_model:Epoch Step: 757 Train Loss: 0.6936632394790649 elapsed: 0.001554727554321289
INFO

INFO:model.base_model:Epoch Step: 831 Train Loss: 0.6934887170791626 elapsed: 0.0018391609191894531
INFO:model.base_model:Epoch Step: 832 Train Loss: 0.6934852600097656 elapsed: 0.0015261173248291016
INFO:model.base_model:Epoch Step: 833 Train Loss: 0.6934816837310791 elapsed: 0.0017688274383544922
INFO:model.base_model:Epoch Step: 834 Train Loss: 0.6934781670570374 elapsed: 0.0016226768493652344
INFO:model.base_model:Epoch Step: 835 Train Loss: 0.6934746503829956 elapsed: 0.0014929771423339844
INFO:model.base_model:Epoch Step: 836 Train Loss: 0.6934709548950195 elapsed: 0.001611948013305664
INFO:model.base_model:Epoch Step: 837 Train Loss: 0.6934674978256226 elapsed: 0.0017330646514892578
INFO:model.base_model:Epoch Step: 838 Train Loss: 0.6934638023376465 elapsed: 0.0017192363739013672
INFO:model.base_model:Epoch Step: 839 Train Loss: 0.69346022605896 elapsed: 0.001550912857055664
INFO:model.base_model:Epoch Step: 840 Train Loss: 0.6934566497802734 elapsed: 0.0018260478973388672
INFO

INFO:model.base_model:Epoch Step: 914 Train Loss: 0.6932119131088257 elapsed: 0.0016241073608398438
INFO:model.base_model:Epoch Step: 915 Train Loss: 0.6932095289230347 elapsed: 0.0015575885772705078
INFO:model.base_model:Epoch Step: 916 Train Loss: 0.6932072043418884 elapsed: 0.0014171600341796875
INFO:model.base_model:Epoch Step: 917 Train Loss: 0.6932048797607422 elapsed: 0.0018439292907714844
INFO:model.base_model:Epoch Step: 918 Train Loss: 0.6932026147842407 elapsed: 0.002051115036010742
INFO:model.base_model:Epoch Step: 919 Train Loss: 0.6932003498077393 elapsed: 0.0020449161529541016
INFO:model.base_model:Epoch Step: 920 Train Loss: 0.6931981444358826 elapsed: 0.0025179386138916016
INFO:model.base_model:Epoch Step: 921 Train Loss: 0.6931959986686707 elapsed: 0.001608133316040039
INFO:model.base_model:Epoch Step: 922 Train Loss: 0.6931939125061035 elapsed: 0.0016841888427734375
INFO:model.base_model:Epoch Step: 923 Train Loss: 0.6931918263435364 elapsed: 0.001750946044921875
INF

INFO:model.base_model:Epoch Step: 997 Train Loss: 0.6931737661361694 elapsed: 0.0019609928131103516
INFO:model.base_model:Epoch Step: 998 Train Loss: 0.6931754350662231 elapsed: 0.0018372535705566406
INFO:model.base_model:Epoch Step: 999 Train Loss: 0.6931772232055664 elapsed: 0.001771688461303711
INFO:model.base_model:Epoch Step: 1000 Train Loss: 0.6931789517402649 elapsed: 0.001894235610961914
INFO:model.base_model:Epoch Step: 1001 Train Loss: 0.6931807398796082 elapsed: 0.0018010139465332031
INFO:model.base_model:Epoch Step: 1002 Train Loss: 0.6931825876235962 elapsed: 0.0017027854919433594
INFO:model.base_model:Epoch Step: 1003 Train Loss: 0.693184494972229 elapsed: 0.002256155014038086
INFO:model.base_model:Epoch Step: 1004 Train Loss: 0.6931865215301514 elapsed: 0.0018818378448486328
INFO:model.base_model:Epoch Step: 1005 Train Loss: 0.693188488483429 elapsed: 0.0022008419036865234
INFO:model.base_model:Epoch Step: 1006 Train Loss: 0.6931905746459961 elapsed: 0.001872777938842773

INFO:model.base_model:Epoch Step: 1079 Train Loss: 0.6934195756912231 elapsed: 0.0022687911987304688
INFO:model.base_model:Epoch Step: 1080 Train Loss: 0.6934230327606201 elapsed: 0.0021147727966308594
INFO:model.base_model:Epoch Step: 1081 Train Loss: 0.6934263706207275 elapsed: 0.0018932819366455078
INFO:model.base_model:Epoch Step: 1082 Train Loss: 0.6934297680854797 elapsed: 0.007043123245239258
INFO:model.base_model:Epoch Step: 1083 Train Loss: 0.6934331655502319 elapsed: 0.003826141357421875
INFO:model.base_model:Epoch Step: 1084 Train Loss: 0.6934364438056946 elapsed: 0.004071950912475586
INFO:model.base_model:Epoch Step: 1085 Train Loss: 0.693439781665802 elapsed: 0.002753019332885742
INFO:model.base_model:Epoch Step: 1086 Train Loss: 0.6934430599212646 elapsed: 0.0021660327911376953
INFO:model.base_model:Epoch Step: 1087 Train Loss: 0.6934462785720825 elapsed: 0.001809835433959961
INFO:model.base_model:Epoch Step: 1088 Train Loss: 0.6934494972229004 elapsed: 0.0015840530395507

INFO:model.base_model:Epoch Step: 1161 Train Loss: 0.6935539245605469 elapsed: 0.002974987030029297
INFO:model.base_model:Epoch Step: 1162 Train Loss: 0.69355309009552 elapsed: 0.0018820762634277344
INFO:model.base_model:Epoch Step: 1163 Train Loss: 0.6935521960258484 elapsed: 0.0017628669738769531
INFO:model.base_model:Epoch Step: 1164 Train Loss: 0.6935513019561768 elapsed: 0.0020780563354492188
INFO:model.base_model:Epoch Step: 1165 Train Loss: 0.6935502290725708 elapsed: 0.0019538402557373047
INFO:model.base_model:Epoch Step: 1166 Train Loss: 0.6935491561889648 elapsed: 0.002185821533203125
INFO:model.base_model:Epoch Step: 1167 Train Loss: 0.6935480237007141 elapsed: 0.001992940902709961
INFO:model.base_model:Epoch Step: 1168 Train Loss: 0.6935467720031738 elapsed: 0.0018880367279052734
INFO:model.base_model:Epoch Step: 1169 Train Loss: 0.6935455799102783 elapsed: 0.002123117446899414
INFO:model.base_model:Epoch Step: 1170 Train Loss: 0.6935442090034485 elapsed: 0.0017809867858886

INFO:model.base_model:Epoch Step: 1243 Train Loss: 0.6933472752571106 elapsed: 0.0016982555389404297
INFO:model.base_model:Epoch Step: 1244 Train Loss: 0.6933441162109375 elapsed: 0.0017178058624267578
INFO:model.base_model:Epoch Step: 1245 Train Loss: 0.6933408975601196 elapsed: 0.0017790794372558594
INFO:model.base_model:Epoch Step: 1246 Train Loss: 0.6933377385139465 elapsed: 0.0018019676208496094
INFO:model.base_model:Epoch Step: 1247 Train Loss: 0.6933345794677734 elapsed: 0.001615762710571289
INFO:model.base_model:Epoch Step: 1248 Train Loss: 0.6933314800262451 elapsed: 0.0015361309051513672
INFO:model.base_model:Epoch Step: 1249 Train Loss: 0.6933282613754272 elapsed: 0.0014960765838623047
INFO:model.base_model:Epoch Step: 1250 Train Loss: 0.6933251619338989 elapsed: 0.0015628337860107422
INFO:model.base_model:Epoch Step: 1251 Train Loss: 0.6933220624923706 elapsed: 0.0016641616821289062
INFO:model.base_model:Epoch Step: 1252 Train Loss: 0.6933189630508423 elapsed: 0.00180888175

INFO:model.base_model:Epoch Step: 1325 Train Loss: 0.6931628584861755 elapsed: 0.0017337799072265625
INFO:model.base_model:Epoch Step: 1326 Train Loss: 0.6931619048118591 elapsed: 0.0015859603881835938
INFO:model.base_model:Epoch Step: 1327 Train Loss: 0.6931610107421875 elapsed: 0.0017669200897216797
INFO:model.base_model:Epoch Step: 1328 Train Loss: 0.6931600570678711 elapsed: 0.0017731189727783203
INFO:model.base_model:Epoch Step: 1329 Train Loss: 0.6931592226028442 elapsed: 0.0015399456024169922
INFO:model.base_model:Epoch Step: 1330 Train Loss: 0.6931583881378174 elapsed: 0.0016601085662841797
INFO:model.base_model:Epoch Step: 1331 Train Loss: 0.6931575536727905 elapsed: 0.001644134521484375
INFO:model.base_model:Epoch Step: 1332 Train Loss: 0.6931567192077637 elapsed: 0.001512289047241211
INFO:model.base_model:Epoch Step: 1333 Train Loss: 0.6931560039520264 elapsed: 0.001687765121459961
INFO:model.base_model:Epoch Step: 1334 Train Loss: 0.6931552886962891 elapsed: 0.0017197132110

INFO:model.base_model:Epoch Step: 1407 Train Loss: 0.6931744813919067 elapsed: 0.001741170883178711
INFO:model.base_model:Epoch Step: 1408 Train Loss: 0.6931755542755127 elapsed: 0.0016422271728515625
INFO:model.base_model:Epoch Step: 1409 Train Loss: 0.6931765079498291 elapsed: 0.0017189979553222656
INFO:model.base_model:Epoch Step: 1410 Train Loss: 0.6931775808334351 elapsed: 0.001834869384765625
INFO:model.base_model:Epoch Step: 1411 Train Loss: 0.6931785941123962 elapsed: 0.001965045928955078
INFO:model.base_model:Epoch Step: 1412 Train Loss: 0.6931796073913574 elapsed: 0.0017821788787841797
INFO:model.base_model:Epoch Step: 1413 Train Loss: 0.6931807994842529 elapsed: 0.0016407966613769531
INFO:model.base_model:Epoch Step: 1414 Train Loss: 0.6931818127632141 elapsed: 0.0017359256744384766
INFO:model.base_model:Epoch Step: 1415 Train Loss: 0.6931828856468201 elapsed: 0.001756906509399414
INFO:model.base_model:Epoch Step: 1416 Train Loss: 0.6931840181350708 elapsed: 0.00167083740234

INFO:model.base_model:Epoch Step: 1489 Train Loss: 0.6932752132415771 elapsed: 0.0015230178833007812
INFO:model.base_model:Epoch Step: 1490 Train Loss: 0.6932764053344727 elapsed: 0.00154876708984375
INFO:model.base_model:Epoch Step: 1491 Train Loss: 0.6932774782180786 elapsed: 0.0015480518341064453
INFO:model.base_model:Epoch Step: 1492 Train Loss: 0.6932786703109741 elapsed: 0.0016570091247558594
INFO:model.base_model:Epoch Step: 1493 Train Loss: 0.6932798624038696 elapsed: 0.0017740726470947266
INFO:model.base_model:Epoch Step: 1494 Train Loss: 0.6932809352874756 elapsed: 0.0015659332275390625
INFO:model.base_model:Epoch Step: 1495 Train Loss: 0.6932820081710815 elapsed: 0.0016529560089111328
INFO:model.base_model:Epoch Step: 1496 Train Loss: 0.6932831406593323 elapsed: 0.0016171932220458984
INFO:model.base_model:Epoch Step: 1497 Train Loss: 0.693284273147583 elapsed: 0.001796722412109375
INFO:model.base_model:Epoch Step: 1498 Train Loss: 0.693285346031189 elapsed: 0.001567125320434

INFO:model.base_model:Epoch Step: 1571 Train Loss: 0.6933293342590332 elapsed: 0.002459287643432617
INFO:model.base_model:Epoch Step: 1572 Train Loss: 0.6933293342590332 elapsed: 0.002125263214111328
INFO:model.base_model:Epoch Step: 1573 Train Loss: 0.693329393863678 elapsed: 0.0021810531616210938
INFO:model.base_model:Epoch Step: 1574 Train Loss: 0.6933293342590332 elapsed: 0.002249002456665039
INFO:model.base_model:Epoch Step: 1575 Train Loss: 0.6933293342590332 elapsed: 0.0022079944610595703
INFO:model.base_model:Epoch Step: 1576 Train Loss: 0.6933293342590332 elapsed: 0.0016901493072509766
INFO:model.base_model:Epoch Step: 1577 Train Loss: 0.6933292746543884 elapsed: 0.002621889114379883
INFO:model.base_model:Epoch Step: 1578 Train Loss: 0.6933292150497437 elapsed: 0.0018093585968017578
INFO:model.base_model:Epoch Step: 1579 Train Loss: 0.6933291554450989 elapsed: 0.0015821456909179688
INFO:model.base_model:Epoch Step: 1580 Train Loss: 0.6933290362358093 elapsed: 0.001661777496337

INFO:model.base_model:Epoch Step: 1653 Train Loss: 0.6932837963104248 elapsed: 0.0016121864318847656
INFO:model.base_model:Epoch Step: 1654 Train Loss: 0.6932827234268188 elapsed: 0.0017058849334716797
INFO:model.base_model:Epoch Step: 1655 Train Loss: 0.6932816505432129 elapsed: 0.0018329620361328125
INFO:model.base_model:Epoch Step: 1656 Train Loss: 0.6932806968688965 elapsed: 0.00160980224609375
INFO:model.base_model:Epoch Step: 1657 Train Loss: 0.6932796239852905 elapsed: 0.0016360282897949219
INFO:model.base_model:Epoch Step: 1658 Train Loss: 0.6932785511016846 elapsed: 0.0017170906066894531
INFO:model.base_model:Epoch Step: 1659 Train Loss: 0.6932774782180786 elapsed: 0.0015718936920166016
INFO:model.base_model:Epoch Step: 1660 Train Loss: 0.6932764053344727 elapsed: 0.0016269683837890625
INFO:model.base_model:Epoch Step: 1661 Train Loss: 0.6932753324508667 elapsed: 0.0017468929290771484
INFO:model.base_model:Epoch Step: 1662 Train Loss: 0.6932742595672607 elapsed: 0.001477003097

INFO:model.base_model:Epoch Step: 1735 Train Loss: 0.6931921243667603 elapsed: 0.0015780925750732422
INFO:model.base_model:Epoch Step: 1736 Train Loss: 0.6931911706924438 elapsed: 0.0015039443969726562
INFO:model.base_model:Epoch Step: 1737 Train Loss: 0.6931901574134827 elapsed: 0.0016591548919677734
INFO:model.base_model:Epoch Step: 1738 Train Loss: 0.693189263343811 elapsed: 0.0017852783203125
INFO:model.base_model:Epoch Step: 1739 Train Loss: 0.6931883096694946 elapsed: 0.0016989707946777344
INFO:model.base_model:Epoch Step: 1740 Train Loss: 0.6931872367858887 elapsed: 0.0018482208251953125
INFO:model.base_model:Epoch Step: 1741 Train Loss: 0.693186342716217 elapsed: 0.001796722412109375
INFO:model.base_model:Epoch Step: 1742 Train Loss: 0.6931854486465454 elapsed: 0.0015590190887451172
INFO:model.base_model:Epoch Step: 1743 Train Loss: 0.693184494972229 elapsed: 0.001741170883178711
INFO:model.base_model:Epoch Step: 1744 Train Loss: 0.6931835412979126 elapsed: 0.001598119735717773

INFO:model.base_model:Epoch Step: 1817 Train Loss: 0.6931471824645996 elapsed: 0.0017621517181396484
INFO:model.base_model:Epoch Step: 1818 Train Loss: 0.6931471824645996 elapsed: 0.0016818046569824219
INFO:model.base_model:Epoch Step: 1819 Train Loss: 0.6931471824645996 elapsed: 0.0018858909606933594
INFO:model.base_model:Epoch Step: 1820 Train Loss: 0.6931472420692444 elapsed: 0.0018591880798339844
INFO:model.base_model:Epoch Step: 1821 Train Loss: 0.6931472420692444 elapsed: 0.002028942108154297
INFO:model.base_model:Epoch Step: 1822 Train Loss: 0.6931473016738892 elapsed: 0.0017688274383544922
INFO:model.base_model:Epoch Step: 1823 Train Loss: 0.6931473612785339 elapsed: 0.0016968250274658203
INFO:model.base_model:Epoch Step: 1824 Train Loss: 0.6931474804878235 elapsed: 0.0017979145050048828
INFO:model.base_model:Epoch Step: 1825 Train Loss: 0.6931475400924683 elapsed: 0.0015828609466552734
INFO:model.base_model:Epoch Step: 1826 Train Loss: 0.6931476593017578 elapsed: 0.00209474563

INFO:model.base_model:Epoch Step: 1899 Train Loss: 0.6931904554367065 elapsed: 0.0015490055084228516
INFO:model.base_model:Epoch Step: 1900 Train Loss: 0.693191409111023 elapsed: 0.001699686050415039
INFO:model.base_model:Epoch Step: 1901 Train Loss: 0.6931923627853394 elapsed: 0.0014979839324951172
INFO:model.base_model:Epoch Step: 1902 Train Loss: 0.6931933760643005 elapsed: 0.001645803451538086
INFO:model.base_model:Epoch Step: 1903 Train Loss: 0.6931943893432617 elapsed: 0.0016181468963623047
INFO:model.base_model:Epoch Step: 1904 Train Loss: 0.6931953430175781 elapsed: 0.0016829967498779297
INFO:model.base_model:Epoch Step: 1905 Train Loss: 0.6931964159011841 elapsed: 0.0018570423126220703
INFO:model.base_model:Epoch Step: 1906 Train Loss: 0.6931973695755005 elapsed: 0.0015530586242675781
INFO:model.base_model:Epoch Step: 1907 Train Loss: 0.6931984424591064 elapsed: 0.0015799999237060547
INFO:model.base_model:Epoch Step: 1908 Train Loss: 0.6931994557380676 elapsed: 0.0017697811126

INFO:model.base_model:Epoch Step: 1981 Train Loss: 0.6932761073112488 elapsed: 0.0023660659790039062
INFO:model.base_model:Epoch Step: 1982 Train Loss: 0.6932770013809204 elapsed: 0.002246856689453125
INFO:model.base_model:Epoch Step: 1983 Train Loss: 0.6932778358459473 elapsed: 0.001708984375
INFO:model.base_model:Epoch Step: 1984 Train Loss: 0.6932787299156189 elapsed: 0.0019829273223876953
INFO:model.base_model:Epoch Step: 1985 Train Loss: 0.6932795643806458 elapsed: 0.0019829273223876953
INFO:model.base_model:Epoch Step: 1986 Train Loss: 0.6932804584503174 elapsed: 0.0017800331115722656
INFO:model.base_model:Epoch Step: 1987 Train Loss: 0.6932812929153442 elapsed: 0.0018470287322998047
INFO:model.base_model:Epoch Step: 1988 Train Loss: 0.6932820677757263 elapsed: 0.002035856246948242
INFO:model.base_model:Epoch Step: 1989 Train Loss: 0.6932829022407532 elapsed: 0.0018429756164550781
INFO:model.base_model:Epoch Step: 1990 Train Loss: 0.6932836771011353 elapsed: 0.0016560554504394531

INFO:model.base_model:Epoch Step: 2063 Train Loss: 0.6933020353317261 elapsed: 0.0024230480194091797
INFO:model.base_model:Epoch Step: 2064 Train Loss: 0.6933016777038574 elapsed: 0.0021429061889648438
INFO:model.base_model:Epoch Step: 2065 Train Loss: 0.6933013200759888 elapsed: 0.002064943313598633
INFO:model.base_model:Epoch Step: 2066 Train Loss: 0.6933009624481201 elapsed: 0.0018880367279052734
INFO:model.base_model:Epoch Step: 2067 Train Loss: 0.6933005452156067 elapsed: 0.005137920379638672
INFO:model.base_model:Epoch Step: 2068 Train Loss: 0.6933001279830933 elapsed: 0.0020170211791992188
INFO:model.base_model:Epoch Step: 2069 Train Loss: 0.6932996511459351 elapsed: 0.0020449161529541016
INFO:model.base_model:Epoch Step: 2070 Train Loss: 0.6932991743087769 elapsed: 0.0019991397857666016
INFO:model.base_model:Epoch Step: 2071 Train Loss: 0.6932986974716187 elapsed: 0.0017769336700439453
INFO:model.base_model:Epoch Step: 2072 Train Loss: 0.6932982206344604 elapsed: 0.001677036285

INFO:model.base_model:Epoch Step: 2145 Train Loss: 0.6932294964790344 elapsed: 0.0018360614776611328
INFO:model.base_model:Epoch Step: 2146 Train Loss: 0.6932282447814941 elapsed: 0.0019071102142333984
INFO:model.base_model:Epoch Step: 2147 Train Loss: 0.6932270526885986 elapsed: 0.0017058849334716797
INFO:model.base_model:Epoch Step: 2148 Train Loss: 0.6932259202003479 elapsed: 0.0017957687377929688
INFO:model.base_model:Epoch Step: 2149 Train Loss: 0.6932246685028076 elapsed: 0.0019571781158447266
INFO:model.base_model:Epoch Step: 2150 Train Loss: 0.6932234764099121 elapsed: 0.0019631385803222656
INFO:model.base_model:Epoch Step: 2151 Train Loss: 0.6932222843170166 elapsed: 0.0019519329071044922
INFO:model.base_model:Epoch Step: 2152 Train Loss: 0.6932210922241211 elapsed: 0.002012014389038086
INFO:model.base_model:Epoch Step: 2153 Train Loss: 0.6932199001312256 elapsed: 0.0017802715301513672
INFO:model.base_model:Epoch Step: 2154 Train Loss: 0.6932187080383301 elapsed: 0.00173521041

INFO:model.base_model:Epoch Step: 2227 Train Loss: 0.6931519508361816 elapsed: 0.001965045928955078
INFO:model.base_model:Epoch Step: 2228 Train Loss: 0.6931515336036682 elapsed: 0.00199127197265625
INFO:model.base_model:Epoch Step: 2229 Train Loss: 0.6931511163711548 elapsed: 0.0019030570983886719
INFO:model.base_model:Epoch Step: 2230 Train Loss: 0.6931507587432861 elapsed: 0.002147674560546875
INFO:model.base_model:Epoch Step: 2231 Train Loss: 0.6931504011154175 elapsed: 0.0020999908447265625
INFO:model.base_model:Epoch Step: 2232 Train Loss: 0.6931500434875488 elapsed: 0.002077817916870117
INFO:model.base_model:Epoch Step: 2233 Train Loss: 0.6931496858596802 elapsed: 0.001796722412109375
INFO:model.base_model:Epoch Step: 2234 Train Loss: 0.6931493878364563 elapsed: 0.001783132553100586
INFO:model.base_model:Epoch Step: 2235 Train Loss: 0.6931491494178772 elapsed: 0.0019178390502929688
INFO:model.base_model:Epoch Step: 2236 Train Loss: 0.6931488513946533 elapsed: 0.00207281112670898

INFO:model.base_model:Epoch Step: 2309 Train Loss: 0.6931860446929932 elapsed: 0.0018699169158935547
INFO:model.base_model:Epoch Step: 2310 Train Loss: 0.6931873559951782 elapsed: 0.0019142627716064453
INFO:model.base_model:Epoch Step: 2311 Train Loss: 0.6931886672973633 elapsed: 0.0018591880798339844
INFO:model.base_model:Epoch Step: 2312 Train Loss: 0.6931899189949036 elapsed: 0.0021402835845947266
INFO:model.base_model:Epoch Step: 2313 Train Loss: 0.6931912899017334 elapsed: 0.0023660659790039062
INFO:model.base_model:Epoch Step: 2314 Train Loss: 0.6931926012039185 elapsed: 0.0020368099212646484
INFO:model.base_model:Epoch Step: 2315 Train Loss: 0.6931939125061035 elapsed: 0.0020170211791992188
INFO:model.base_model:Epoch Step: 2316 Train Loss: 0.6931953430175781 elapsed: 0.0020301342010498047
INFO:model.base_model:Epoch Step: 2317 Train Loss: 0.6931966543197632 elapsed: 0.001730203628540039
INFO:model.base_model:Epoch Step: 2318 Train Loss: 0.6931981444358826 elapsed: 0.00205683708

INFO:model.base_model:Epoch Step: 2391 Train Loss: 0.6933208703994751 elapsed: 0.0016620159149169922
INFO:model.base_model:Epoch Step: 2392 Train Loss: 0.6933224201202393 elapsed: 0.0017211437225341797
INFO:model.base_model:Epoch Step: 2393 Train Loss: 0.6933239698410034 elapsed: 0.0015628337860107422
INFO:model.base_model:Epoch Step: 2394 Train Loss: 0.6933254599571228 elapsed: 0.001706838607788086
INFO:model.base_model:Epoch Step: 2395 Train Loss: 0.6933268904685974 elapsed: 0.0017390251159667969
INFO:model.base_model:Epoch Step: 2396 Train Loss: 0.6933283805847168 elapsed: 0.0017540454864501953
INFO:model.base_model:Epoch Step: 2397 Train Loss: 0.6933298110961914 elapsed: 0.0016779899597167969
INFO:model.base_model:Epoch Step: 2398 Train Loss: 0.693331241607666 elapsed: 0.0016741752624511719
INFO:model.base_model:Epoch Step: 2399 Train Loss: 0.6933326721191406 elapsed: 0.0017130374908447266
INFO:model.base_model:Epoch Step: 2400 Train Loss: 0.6933339834213257 elapsed: 0.001725912094

INFO:model.base_model:Epoch Step: 2473 Train Loss: 0.6933547258377075 elapsed: 0.0016720294952392578
INFO:model.base_model:Epoch Step: 2474 Train Loss: 0.6933537125587463 elapsed: 0.0017049312591552734
INFO:model.base_model:Epoch Step: 2475 Train Loss: 0.6933526992797852 elapsed: 0.0016961097717285156
INFO:model.base_model:Epoch Step: 2476 Train Loss: 0.6933516263961792 elapsed: 0.0016758441925048828
INFO:model.base_model:Epoch Step: 2477 Train Loss: 0.6933505535125732 elapsed: 0.0015368461608886719
INFO:model.base_model:Epoch Step: 2478 Train Loss: 0.6933494210243225 elapsed: 0.0016412734985351562
INFO:model.base_model:Epoch Step: 2479 Train Loss: 0.6933482885360718 elapsed: 0.0015883445739746094
INFO:model.base_model:Epoch Step: 2480 Train Loss: 0.6933470964431763 elapsed: 0.00167083740234375
INFO:model.base_model:Epoch Step: 2481 Train Loss: 0.6933459639549255 elapsed: 0.001734018325805664
INFO:model.base_model:Epoch Step: 2482 Train Loss: 0.6933447122573853 elapsed: 0.0016767978668

INFO:model.base_model:Epoch Step: 2555 Train Loss: 0.6932100653648376 elapsed: 0.0016570091247558594
INFO:model.base_model:Epoch Step: 2556 Train Loss: 0.6932082176208496 elapsed: 0.0019309520721435547
INFO:model.base_model:Epoch Step: 2557 Train Loss: 0.6932063698768616 elapsed: 0.0017352104187011719
INFO:model.base_model:Epoch Step: 2558 Train Loss: 0.6932046413421631 elapsed: 0.0017201900482177734
INFO:model.base_model:Epoch Step: 2559 Train Loss: 0.6932027339935303 elapsed: 0.0016689300537109375
INFO:model.base_model:Epoch Step: 2560 Train Loss: 0.693200945854187 elapsed: 0.0017828941345214844
INFO:model.base_model:Epoch Step: 2561 Train Loss: 0.6931992769241333 elapsed: 0.00403904914855957
INFO:model.base_model:Epoch Step: 2562 Train Loss: 0.69319748878479 elapsed: 0.008594036102294922
INFO:model.base_model:Epoch Step: 2563 Train Loss: 0.6931958198547363 elapsed: 0.0035920143127441406
INFO:model.base_model:Epoch Step: 2564 Train Loss: 0.6931940913200378 elapsed: 0.0019299983978271

INFO:model.base_model:Epoch Step: 2637 Train Loss: 0.6931570768356323 elapsed: 0.0018398761749267578
INFO:model.base_model:Epoch Step: 2638 Train Loss: 0.6931580305099487 elapsed: 0.0018050670623779297
INFO:model.base_model:Epoch Step: 2639 Train Loss: 0.6931589841842651 elapsed: 0.0017521381378173828
INFO:model.base_model:Epoch Step: 2640 Train Loss: 0.6931599378585815 elapsed: 0.001641988754272461
INFO:model.base_model:Epoch Step: 2641 Train Loss: 0.6931610107421875 elapsed: 0.0016331672668457031
INFO:model.base_model:Epoch Step: 2642 Train Loss: 0.6931620836257935 elapsed: 0.0015001296997070312
INFO:model.base_model:Epoch Step: 2643 Train Loss: 0.6931631565093994 elapsed: 0.0014960765838623047
INFO:model.base_model:Epoch Step: 2644 Train Loss: 0.6931643486022949 elapsed: 0.0015361309051513672
INFO:model.base_model:Epoch Step: 2645 Train Loss: 0.6931655406951904 elapsed: 0.0017001628875732422
INFO:model.base_model:Epoch Step: 2646 Train Loss: 0.6931667327880859 elapsed: 0.00157690048

INFO:model.base_model:Epoch Step: 2719 Train Loss: 0.6933202743530273 elapsed: 0.0017542839050292969
INFO:model.base_model:Epoch Step: 2720 Train Loss: 0.6933224201202393 elapsed: 0.0016620159149169922
INFO:model.base_model:Epoch Step: 2721 Train Loss: 0.693324625492096 elapsed: 0.0017690658569335938
INFO:model.base_model:Epoch Step: 2722 Train Loss: 0.6933267712593079 elapsed: 0.001626729965209961
INFO:model.base_model:Epoch Step: 2723 Train Loss: 0.693328857421875 elapsed: 0.001970052719116211
INFO:model.base_model:Epoch Step: 2724 Train Loss: 0.6933310031890869 elapsed: 0.001728057861328125
INFO:model.base_model:Epoch Step: 2725 Train Loss: 0.6933330297470093 elapsed: 0.0018908977508544922
INFO:model.base_model:Epoch Step: 2726 Train Loss: 0.6933350563049316 elapsed: 0.0017750263214111328
INFO:model.base_model:Epoch Step: 2727 Train Loss: 0.693337082862854 elapsed: 0.001789093017578125
INFO:model.base_model:Epoch Step: 2728 Train Loss: 0.6933389902114868 elapsed: 0.00162720680236816

INFO:model.base_model:Epoch Step: 2801 Train Loss: 0.6933289766311646 elapsed: 0.0017170906066894531
INFO:model.base_model:Epoch Step: 2802 Train Loss: 0.6933264136314392 elapsed: 0.0016369819641113281
INFO:model.base_model:Epoch Step: 2803 Train Loss: 0.6933238506317139 elapsed: 0.0016407966613769531
INFO:model.base_model:Epoch Step: 2804 Train Loss: 0.6933212280273438 elapsed: 0.0016200542449951172
INFO:model.base_model:Epoch Step: 2805 Train Loss: 0.6933184862136841 elapsed: 0.0020117759704589844
INFO:model.base_model:Epoch Step: 2806 Train Loss: 0.6933157444000244 elapsed: 0.001644134521484375
INFO:model.base_model:Epoch Step: 2807 Train Loss: 0.6933130025863647 elapsed: 0.0019159317016601562
INFO:model.base_model:Epoch Step: 2808 Train Loss: 0.6933102011680603 elapsed: 0.0017659664154052734
INFO:model.base_model:Epoch Step: 2809 Train Loss: 0.6933072805404663 elapsed: 0.0015950202941894531
INFO:model.base_model:Epoch Step: 2810 Train Loss: 0.6933044195175171 elapsed: 0.00157403945

INFO:model.base_model:Epoch Step: 2883 Train Loss: 0.6931523084640503 elapsed: 0.0016911029815673828
INFO:model.base_model:Epoch Step: 2884 Train Loss: 0.6931536793708801 elapsed: 0.0015871524810791016
INFO:model.base_model:Epoch Step: 2885 Train Loss: 0.6931551694869995 elapsed: 0.0016179084777832031
INFO:model.base_model:Epoch Step: 2886 Train Loss: 0.6931567192077637 elapsed: 0.0016498565673828125
INFO:model.base_model:Epoch Step: 2887 Train Loss: 0.6931584477424622 elapsed: 0.002180814743041992
INFO:model.base_model:Epoch Step: 2888 Train Loss: 0.6931604146957397 elapsed: 0.0016770362854003906
INFO:model.base_model:Epoch Step: 2889 Train Loss: 0.6931624412536621 elapsed: 0.0016639232635498047
INFO:model.base_model:Epoch Step: 2890 Train Loss: 0.6931647062301636 elapsed: 0.0015652179718017578
INFO:model.base_model:Epoch Step: 2891 Train Loss: 0.6931670308113098 elapsed: 0.0015799999237060547
INFO:model.base_model:Epoch Step: 2892 Train Loss: 0.6931695938110352 elapsed: 0.00160598754

INFO:model.base_model:Epoch Step: 2965 Train Loss: 0.6935534477233887 elapsed: 0.002095937728881836
INFO:model.base_model:Epoch Step: 2966 Train Loss: 0.693555474281311 elapsed: 0.0022699832916259766
INFO:model.base_model:Epoch Step: 2967 Train Loss: 0.6935571432113647 elapsed: 0.0023658275604248047
INFO:model.base_model:Epoch Step: 2968 Train Loss: 0.6935586929321289 elapsed: 0.0022449493408203125
INFO:model.base_model:Epoch Step: 2969 Train Loss: 0.6935597658157349 elapsed: 0.0023009777069091797
INFO:model.base_model:Epoch Step: 2970 Train Loss: 0.6935606598854065 elapsed: 0.0023369789123535156
INFO:model.base_model:Epoch Step: 2971 Train Loss: 0.6935611963272095 elapsed: 0.0020470619201660156
INFO:model.base_model:Epoch Step: 2972 Train Loss: 0.6935614943504333 elapsed: 0.00494074821472168
INFO:model.base_model:Epoch Step: 2973 Train Loss: 0.6935615539550781 elapsed: 0.0022246837615966797
INFO:model.base_model:Epoch Step: 2974 Train Loss: 0.6935611963272095 elapsed: 0.00177121162414

INFO:model.base_model:Epoch Step: 3047 Train Loss: 0.6931568384170532 elapsed: 0.0019299983978271484
INFO:model.base_model:Epoch Step: 3048 Train Loss: 0.6931546926498413 elapsed: 0.001962900161743164
INFO:model.base_model:Epoch Step: 3049 Train Loss: 0.6931527853012085 elapsed: 0.0017859935760498047
INFO:model.base_model:Epoch Step: 3050 Train Loss: 0.6931512355804443 elapsed: 0.001909017562866211
INFO:model.base_model:Epoch Step: 3051 Train Loss: 0.6931498050689697 elapsed: 0.0016241073608398438
INFO:model.base_model:Epoch Step: 3052 Train Loss: 0.6931488513946533 elapsed: 0.0015900135040283203
INFO:model.base_model:Epoch Step: 3053 Train Loss: 0.6931480169296265 elapsed: 0.0015659332275390625
INFO:model.base_model:Epoch Step: 3054 Train Loss: 0.6931474804878235 elapsed: 0.0018889904022216797
INFO:model.base_model:Epoch Step: 3055 Train Loss: 0.6931471824645996 elapsed: 0.001547098159790039
INFO:model.base_model:Epoch Step: 3056 Train Loss: 0.6931471824645996 elapsed: 0.0016927719116

INFO:model.base_model:Epoch Step: 3129 Train Loss: 0.6935487985610962 elapsed: 0.0017228126525878906
INFO:model.base_model:Epoch Step: 3130 Train Loss: 0.6935544013977051 elapsed: 0.002012014389038086
INFO:model.base_model:Epoch Step: 3131 Train Loss: 0.6935598850250244 elapsed: 0.001984834671020508
INFO:model.base_model:Epoch Step: 3132 Train Loss: 0.6935651302337646 elapsed: 0.0018720626831054688
INFO:model.base_model:Epoch Step: 3133 Train Loss: 0.6935703754425049 elapsed: 0.001786947250366211
INFO:model.base_model:Epoch Step: 3134 Train Loss: 0.6935753226280212 elapsed: 0.0017998218536376953
INFO:model.base_model:Epoch Step: 3135 Train Loss: 0.693580150604248 elapsed: 0.0020399093627929688
INFO:model.base_model:Epoch Step: 3136 Train Loss: 0.6935849189758301 elapsed: 0.0019979476928710938
INFO:model.base_model:Epoch Step: 3137 Train Loss: 0.693589448928833 elapsed: 0.0018639564514160156
INFO:model.base_model:Epoch Step: 3138 Train Loss: 0.6935938596725464 elapsed: 0.001922130584716

INFO:model.base_model:Epoch Step: 3211 Train Loss: 0.6934354305267334 elapsed: 0.0017731189727783203
INFO:model.base_model:Epoch Step: 3212 Train Loss: 0.6934287548065186 elapsed: 0.0023331642150878906
INFO:model.base_model:Epoch Step: 3213 Train Loss: 0.6934220790863037 elapsed: 0.002064228057861328
INFO:model.base_model:Epoch Step: 3214 Train Loss: 0.6934154033660889 elapsed: 0.001961946487426758
INFO:model.base_model:Epoch Step: 3215 Train Loss: 0.693408727645874 elapsed: 0.0019030570983886719
INFO:model.base_model:Epoch Step: 3216 Train Loss: 0.6934020519256592 elapsed: 0.0018770694732666016
INFO:model.base_model:Epoch Step: 3217 Train Loss: 0.6933953762054443 elapsed: 0.0018029212951660156
INFO:model.base_model:Epoch Step: 3218 Train Loss: 0.6933887004852295 elapsed: 0.0016751289367675781
INFO:model.base_model:Epoch Step: 3219 Train Loss: 0.6933820247650146 elapsed: 0.0021979808807373047
INFO:model.base_model:Epoch Step: 3220 Train Loss: 0.6933754682540894 elapsed: 0.0019710063934

INFO:model.base_model:Epoch Step: 3293 Train Loss: 0.6931658387184143 elapsed: 0.002299070358276367
INFO:model.base_model:Epoch Step: 3294 Train Loss: 0.6931682825088501 elapsed: 0.002025127410888672
INFO:model.base_model:Epoch Step: 3295 Train Loss: 0.6931709051132202 elapsed: 0.002401113510131836
INFO:model.base_model:Epoch Step: 3296 Train Loss: 0.6931736469268799 elapsed: 0.0019452571868896484
INFO:model.base_model:Epoch Step: 3297 Train Loss: 0.6931765079498291 elapsed: 0.0023021697998046875
INFO:model.base_model:Epoch Step: 3298 Train Loss: 0.6931794881820679 elapsed: 0.002368927001953125
INFO:model.base_model:Epoch Step: 3299 Train Loss: 0.693182647228241 elapsed: 0.0019888877868652344
INFO:model.base_model:Epoch Step: 3300 Train Loss: 0.6931859254837036 elapsed: 0.002020120620727539
INFO:model.base_model:Epoch Step: 3301 Train Loss: 0.6931893825531006 elapsed: 0.0020051002502441406
INFO:model.base_model:Epoch Step: 3302 Train Loss: 0.6931928396224976 elapsed: 0.0025541782379150

INFO:model.base_model:Epoch Step: 3375 Train Loss: 0.6935677528381348 elapsed: 0.002019166946411133
INFO:model.base_model:Epoch Step: 3376 Train Loss: 0.693570613861084 elapsed: 0.0017228126525878906
INFO:model.base_model:Epoch Step: 3377 Train Loss: 0.6935731768608093 elapsed: 0.0017359256744384766
INFO:model.base_model:Epoch Step: 3378 Train Loss: 0.6935756206512451 elapsed: 0.0021011829376220703
INFO:model.base_model:Epoch Step: 3379 Train Loss: 0.6935779452323914 elapsed: 0.0019290447235107422
INFO:model.base_model:Epoch Step: 3380 Train Loss: 0.6935799717903137 elapsed: 0.002270936965942383
INFO:model.base_model:Epoch Step: 3381 Train Loss: 0.6935819387435913 elapsed: 0.0019290447235107422
INFO:model.base_model:Epoch Step: 3382 Train Loss: 0.693583607673645 elapsed: 0.001950979232788086
INFO:model.base_model:Epoch Step: 3383 Train Loss: 0.693585216999054 elapsed: 0.0017447471618652344
INFO:model.base_model:Epoch Step: 3384 Train Loss: 0.6935865879058838 elapsed: 0.0018382072448730

INFO:model.base_model:Epoch Step: 3457 Train Loss: 0.6932857632637024 elapsed: 0.0017359256744384766
INFO:model.base_model:Epoch Step: 3458 Train Loss: 0.6932797431945801 elapsed: 0.0016798973083496094
INFO:model.base_model:Epoch Step: 3459 Train Loss: 0.6932739019393921 elapsed: 0.0018239021301269531
INFO:model.base_model:Epoch Step: 3460 Train Loss: 0.6932680606842041 elapsed: 0.0021860599517822266
INFO:model.base_model:Epoch Step: 3461 Train Loss: 0.6932623982429504 elapsed: 0.0017027854919433594
INFO:model.base_model:Epoch Step: 3462 Train Loss: 0.6932567358016968 elapsed: 0.0023021697998046875
INFO:model.base_model:Epoch Step: 3463 Train Loss: 0.6932511329650879 elapsed: 0.001669168472290039
INFO:model.base_model:Epoch Step: 3464 Train Loss: 0.6932457685470581 elapsed: 0.0020627975463867188
INFO:model.base_model:Epoch Step: 3465 Train Loss: 0.6932404041290283 elapsed: 0.0018999576568603516
INFO:model.base_model:Epoch Step: 3466 Train Loss: 0.6932352781295776 elapsed: 0.00218105316

INFO:model.base_model:Epoch Step: 3539 Train Loss: 0.6932917833328247 elapsed: 0.0016682147979736328
INFO:model.base_model:Epoch Step: 3540 Train Loss: 0.6932981014251709 elapsed: 0.001909017562866211
INFO:model.base_model:Epoch Step: 3541 Train Loss: 0.6933046579360962 elapsed: 0.0015840530395507812
INFO:model.base_model:Epoch Step: 3542 Train Loss: 0.6933112144470215 elapsed: 0.0016260147094726562
INFO:model.base_model:Epoch Step: 3543 Train Loss: 0.6933178305625916 elapsed: 0.0015480518341064453
INFO:model.base_model:Epoch Step: 3544 Train Loss: 0.6933245658874512 elapsed: 0.0016548633575439453
INFO:model.base_model:Epoch Step: 3545 Train Loss: 0.6933313012123108 elapsed: 0.0017726421356201172
INFO:model.base_model:Epoch Step: 3546 Train Loss: 0.69333815574646 elapsed: 0.0022268295288085938
INFO:model.base_model:Epoch Step: 3547 Train Loss: 0.6933450102806091 elapsed: 0.0018439292907714844
INFO:model.base_model:Epoch Step: 3548 Train Loss: 0.6933519840240479 elapsed: 0.0019471645355

INFO:model.base_model:Epoch Step: 3621 Train Loss: 0.6936368942260742 elapsed: 0.0017626285552978516
INFO:model.base_model:Epoch Step: 3622 Train Loss: 0.6936346292495728 elapsed: 0.0016090869903564453
INFO:model.base_model:Epoch Step: 3623 Train Loss: 0.6936321258544922 elapsed: 0.0016751289367675781
INFO:model.base_model:Epoch Step: 3624 Train Loss: 0.6936295032501221 elapsed: 0.0017228126525878906
INFO:model.base_model:Epoch Step: 3625 Train Loss: 0.6936267018318176 elapsed: 0.001711130142211914
INFO:model.base_model:Epoch Step: 3626 Train Loss: 0.6936236619949341 elapsed: 0.0015859603881835938
INFO:model.base_model:Epoch Step: 3627 Train Loss: 0.6936205625534058 elapsed: 0.0017642974853515625
INFO:model.base_model:Epoch Step: 3628 Train Loss: 0.6936172842979431 elapsed: 0.0016741752624511719
INFO:model.base_model:Epoch Step: 3629 Train Loss: 0.6936138272285461 elapsed: 0.0015971660614013672
INFO:model.base_model:Epoch Step: 3630 Train Loss: 0.6936101913452148 elapsed: 0.00174117088

INFO:model.base_model:Epoch Step: 3703 Train Loss: 0.6932336091995239 elapsed: 0.002092123031616211
INFO:model.base_model:Epoch Step: 3704 Train Loss: 0.6932299733161926 elapsed: 0.0016548633575439453
INFO:model.base_model:Epoch Step: 3705 Train Loss: 0.6932263374328613 elapsed: 0.0019578933715820312
INFO:model.base_model:Epoch Step: 3706 Train Loss: 0.6932228803634644 elapsed: 0.0015239715576171875
INFO:model.base_model:Epoch Step: 3707 Train Loss: 0.6932194232940674 elapsed: 0.001750946044921875
INFO:model.base_model:Epoch Step: 3708 Train Loss: 0.6932160258293152 elapsed: 0.0015718936920166016
INFO:model.base_model:Epoch Step: 3709 Train Loss: 0.6932127475738525 elapsed: 0.0018668174743652344
INFO:model.base_model:Epoch Step: 3710 Train Loss: 0.6932095885276794 elapsed: 0.0015499591827392578
INFO:model.base_model:Epoch Step: 3711 Train Loss: 0.6932064890861511 elapsed: 0.0017421245574951172
INFO:model.base_model:Epoch Step: 3712 Train Loss: 0.6932034492492676 elapsed: 0.001787900924

INFO:model.base_model:Epoch Step: 3785 Train Loss: 0.6931723952293396 elapsed: 0.001528024673461914
INFO:model.base_model:Epoch Step: 3786 Train Loss: 0.6931736469268799 elapsed: 0.0014688968658447266
INFO:model.base_model:Epoch Step: 3787 Train Loss: 0.6931748390197754 elapsed: 0.0017790794372558594
INFO:model.base_model:Epoch Step: 3788 Train Loss: 0.6931761503219604 elapsed: 0.0014519691467285156
INFO:model.base_model:Epoch Step: 3789 Train Loss: 0.6931774020195007 elapsed: 0.0015399456024169922
INFO:model.base_model:Epoch Step: 3790 Train Loss: 0.6931787729263306 elapsed: 0.0016269683837890625
INFO:model.base_model:Epoch Step: 3791 Train Loss: 0.6931799650192261 elapsed: 0.0017368793487548828
INFO:model.base_model:Epoch Step: 3792 Train Loss: 0.6931812763214111 elapsed: 0.0016052722930908203
INFO:model.base_model:Epoch Step: 3793 Train Loss: 0.6931825280189514 elapsed: 0.0014290809631347656
INFO:model.base_model:Epoch Step: 3794 Train Loss: 0.6931837797164917 elapsed: 0.00143218040

INFO:model.base_model:Epoch Step: 3867 Train Loss: 0.6931591033935547 elapsed: 0.0016951560974121094
INFO:model.base_model:Epoch Step: 3868 Train Loss: 0.6931577324867249 elapsed: 0.0018680095672607422
INFO:model.base_model:Epoch Step: 3869 Train Loss: 0.693156361579895 elapsed: 0.0016357898712158203
INFO:model.base_model:Epoch Step: 3870 Train Loss: 0.69315505027771 elapsed: 0.0016162395477294922
INFO:model.base_model:Epoch Step: 3871 Train Loss: 0.6931538581848145 elapsed: 0.0017130374908447266
INFO:model.base_model:Epoch Step: 3872 Train Loss: 0.6931527853012085 elapsed: 0.0016922950744628906
INFO:model.base_model:Epoch Step: 3873 Train Loss: 0.6931517124176025 elapsed: 0.0015370845794677734
INFO:model.base_model:Epoch Step: 3874 Train Loss: 0.6931507587432861 elapsed: 0.0015718936920166016
INFO:model.base_model:Epoch Step: 3875 Train Loss: 0.6931498646736145 elapsed: 0.0016503334045410156
INFO:model.base_model:Epoch Step: 3876 Train Loss: 0.6931490898132324 elapsed: 0.0015869140625

INFO:model.base_model:Epoch Step: 3949 Train Loss: 0.6934024095535278 elapsed: 0.0018398761749267578
INFO:model.base_model:Epoch Step: 3950 Train Loss: 0.6934019923210144 elapsed: 0.0017859935760498047
INFO:model.base_model:Epoch Step: 3951 Train Loss: 0.6934011578559875 elapsed: 0.0041429996490478516
INFO:model.base_model:Epoch Step: 3952 Train Loss: 0.6933998465538025 elapsed: 0.0023479461669921875
INFO:model.base_model:Epoch Step: 3953 Train Loss: 0.693398118019104 elapsed: 0.002568960189819336
INFO:model.base_model:Epoch Step: 3954 Train Loss: 0.6933958530426025 elapsed: 0.0018661022186279297
INFO:model.base_model:Epoch Step: 3955 Train Loss: 0.6933932304382324 elapsed: 0.001558065414428711
INFO:model.base_model:Epoch Step: 3956 Train Loss: 0.6933901309967041 elapsed: 0.0018198490142822266
INFO:model.base_model:Epoch Step: 3957 Train Loss: 0.6933865547180176 elapsed: 0.0015842914581298828
INFO:model.base_model:Epoch Step: 3958 Train Loss: 0.6933825612068176 elapsed: 0.0016529560089

INFO:model.base_model:Epoch Step: 4031 Train Loss: 0.6936018466949463 elapsed: 0.0016980171203613281
INFO:model.base_model:Epoch Step: 4032 Train Loss: 0.6936193704605103 elapsed: 0.0015528202056884766
INFO:model.base_model:Epoch Step: 4033 Train Loss: 0.6936363577842712 elapsed: 0.001728057861328125
INFO:model.base_model:Epoch Step: 4034 Train Loss: 0.6936526894569397 elapsed: 0.0017027854919433594
INFO:model.base_model:Epoch Step: 4035 Train Loss: 0.6936682462692261 elapsed: 0.0016832351684570312
INFO:model.base_model:Epoch Step: 4036 Train Loss: 0.6936830282211304 elapsed: 0.0016520023345947266
INFO:model.base_model:Epoch Step: 4037 Train Loss: 0.6936967968940735 elapsed: 0.0016019344329833984
INFO:model.base_model:Epoch Step: 4038 Train Loss: 0.6937096118927002 elapsed: 0.0017580986022949219
INFO:model.base_model:Epoch Step: 4039 Train Loss: 0.693721354007721 elapsed: 0.0016129016876220703
INFO:model.base_model:Epoch Step: 4040 Train Loss: 0.693731963634491 elapsed: 0.0015940666198

INFO:model.base_model:Epoch Step: 4113 Train Loss: 0.6939991116523743 elapsed: 0.0027170181274414062
INFO:model.base_model:Epoch Step: 4114 Train Loss: 0.6940531134605408 elapsed: 0.002001047134399414
INFO:model.base_model:Epoch Step: 4115 Train Loss: 0.6941068172454834 elapsed: 0.0020818710327148438
INFO:model.base_model:Epoch Step: 4116 Train Loss: 0.694159984588623 elapsed: 0.0018310546875
INFO:model.base_model:Epoch Step: 4117 Train Loss: 0.6942123174667358 elapsed: 0.0017321109771728516
INFO:model.base_model:Epoch Step: 4118 Train Loss: 0.6942634582519531 elapsed: 0.0015711784362792969
INFO:model.base_model:Epoch Step: 4119 Train Loss: 0.6943132877349854 elapsed: 0.0015778541564941406
INFO:model.base_model:Epoch Step: 4120 Train Loss: 0.6943612694740295 elapsed: 0.0019769668579101562
INFO:model.base_model:Epoch Step: 4121 Train Loss: 0.6944072246551514 elapsed: 0.0018911361694335938
INFO:model.base_model:Epoch Step: 4122 Train Loss: 0.6944509744644165 elapsed: 0.001873016357421875

INFO:model.base_model:Epoch Step: 4195 Train Loss: 0.6945161819458008 elapsed: 0.0016798973083496094
INFO:model.base_model:Epoch Step: 4196 Train Loss: 0.6946150064468384 elapsed: 0.0017781257629394531
INFO:model.base_model:Epoch Step: 4197 Train Loss: 0.694714367389679 elapsed: 0.0018579959869384766
INFO:model.base_model:Epoch Step: 4198 Train Loss: 0.6948140859603882 elapsed: 0.0016870498657226562
INFO:model.base_model:Epoch Step: 4199 Train Loss: 0.6949135065078735 elapsed: 0.0016858577728271484
INFO:model.base_model:Epoch Step: 4200 Train Loss: 0.6950123310089111 elapsed: 0.001748800277709961
INFO:model.base_model:Epoch Step: 4201 Train Loss: 0.6951102614402771 elapsed: 0.001528024673461914
INFO:model.base_model:Epoch Step: 4202 Train Loss: 0.6952067613601685 elapsed: 0.0015420913696289062
INFO:model.base_model:Epoch Step: 4203 Train Loss: 0.6953014135360718 elapsed: 0.0018029212951660156
INFO:model.base_model:Epoch Step: 4204 Train Loss: 0.6953939199447632 elapsed: 0.0016441345214

INFO:model.base_model:Epoch Step: 4277 Train Loss: 0.69322669506073 elapsed: 0.0016400814056396484
INFO:model.base_model:Epoch Step: 4278 Train Loss: 0.6932566165924072 elapsed: 0.0018718242645263672
INFO:model.base_model:Epoch Step: 4279 Train Loss: 0.6932910680770874 elapsed: 0.0021889209747314453
INFO:model.base_model:Epoch Step: 4280 Train Loss: 0.693329930305481 elapsed: 0.00201416015625
INFO:model.base_model:Epoch Step: 4281 Train Loss: 0.6933729648590088 elapsed: 0.0018301010131835938
INFO:model.base_model:Epoch Step: 4282 Train Loss: 0.6934200525283813 elapsed: 0.0017011165618896484
INFO:model.base_model:Epoch Step: 4283 Train Loss: 0.6934710144996643 elapsed: 0.0015530586242675781
INFO:model.base_model:Epoch Step: 4284 Train Loss: 0.6935257911682129 elapsed: 0.0015110969543457031
INFO:model.base_model:Epoch Step: 4285 Train Loss: 0.6935839653015137 elapsed: 0.0020470619201660156
INFO:model.base_model:Epoch Step: 4286 Train Loss: 0.6936455965042114 elapsed: 0.002028942108154297

INFO:model.base_model:Epoch Step: 4359 Train Loss: 0.6946903467178345 elapsed: 0.0017850399017333984
INFO:model.base_model:Epoch Step: 4360 Train Loss: 0.6946170330047607 elapsed: 0.002062082290649414
INFO:model.base_model:Epoch Step: 4361 Train Loss: 0.6945443749427795 elapsed: 0.0018200874328613281
INFO:model.base_model:Epoch Step: 4362 Train Loss: 0.69447261095047 elapsed: 0.002073049545288086
INFO:model.base_model:Epoch Step: 4363 Train Loss: 0.6944018602371216 elapsed: 0.0017490386962890625
INFO:model.base_model:Epoch Step: 4364 Train Loss: 0.6943321824073792 elapsed: 0.001895904541015625
INFO:model.base_model:Epoch Step: 4365 Train Loss: 0.6942638754844666 elapsed: 0.0015230178833007812
INFO:model.base_model:Epoch Step: 4366 Train Loss: 0.6941969394683838 elapsed: 0.0016639232635498047
INFO:model.base_model:Epoch Step: 4367 Train Loss: 0.69413161277771 elapsed: 0.002089977264404297
INFO:model.base_model:Epoch Step: 4368 Train Loss: 0.6940678358078003 elapsed: 0.001957893371582031

INFO:model.base_model:Epoch Step: 4441 Train Loss: 0.6941430568695068 elapsed: 0.0020759105682373047
INFO:model.base_model:Epoch Step: 4442 Train Loss: 0.6941720247268677 elapsed: 0.002000093460083008
INFO:model.base_model:Epoch Step: 4443 Train Loss: 0.6942005157470703 elapsed: 0.001886129379272461
INFO:model.base_model:Epoch Step: 4444 Train Loss: 0.6942286491394043 elapsed: 0.0017747879028320312
INFO:model.base_model:Epoch Step: 4445 Train Loss: 0.6942563056945801 elapsed: 0.0017359256744384766
INFO:model.base_model:Epoch Step: 4446 Train Loss: 0.6942834258079529 elapsed: 0.00162506103515625
INFO:model.base_model:Epoch Step: 4447 Train Loss: 0.6943099498748779 elapsed: 0.0024149417877197266
INFO:model.base_model:Epoch Step: 4448 Train Loss: 0.6943359375 elapsed: 0.0022699832916259766
INFO:model.base_model:Epoch Step: 4449 Train Loss: 0.6943612694740295 elapsed: 0.0023970603942871094
INFO:model.base_model:Epoch Step: 4450 Train Loss: 0.6943859457969666 elapsed: 0.0019001960754394531


INFO:model.base_model:Epoch Step: 4523 Train Loss: 0.6938920021057129 elapsed: 0.0016129016876220703
INFO:model.base_model:Epoch Step: 4524 Train Loss: 0.6938660144805908 elapsed: 0.0016748905181884766
INFO:model.base_model:Epoch Step: 4525 Train Loss: 0.6938402056694031 elapsed: 0.0017879009246826172
INFO:model.base_model:Epoch Step: 4526 Train Loss: 0.6938145160675049 elapsed: 0.0018711090087890625
INFO:model.base_model:Epoch Step: 4527 Train Loss: 0.693789005279541 elapsed: 0.001961946487426758
INFO:model.base_model:Epoch Step: 4528 Train Loss: 0.6937637329101562 elapsed: 0.0021278858184814453
INFO:model.base_model:Epoch Step: 4529 Train Loss: 0.6937387585639954 elapsed: 0.0018308162689208984
INFO:model.base_model:Epoch Step: 4530 Train Loss: 0.6937140226364136 elapsed: 0.001847982406616211
INFO:model.base_model:Epoch Step: 4531 Train Loss: 0.6936895847320557 elapsed: 0.0018010139465332031
INFO:model.base_model:Epoch Step: 4532 Train Loss: 0.6936654448509216 elapsed: 0.0016047954559

INFO:model.base_model:Epoch Step: 4605 Train Loss: 0.6934719085693359 elapsed: 0.019688129425048828
INFO:model.base_model:Epoch Step: 4606 Train Loss: 0.6934890747070312 elapsed: 0.0015685558319091797
INFO:model.base_model:Epoch Step: 4607 Train Loss: 0.6935064792633057 elapsed: 0.0025789737701416016
INFO:model.base_model:Epoch Step: 4608 Train Loss: 0.6935241222381592 elapsed: 0.0017600059509277344
INFO:model.base_model:Epoch Step: 4609 Train Loss: 0.6935420036315918 elapsed: 0.0017650127410888672
INFO:model.base_model:Epoch Step: 4610 Train Loss: 0.693560004234314 elapsed: 0.0015671253204345703
INFO:model.base_model:Epoch Step: 4611 Train Loss: 0.6935781240463257 elapsed: 0.001708984375
INFO:model.base_model:Epoch Step: 4612 Train Loss: 0.693596363067627 elapsed: 0.0017151832580566406
INFO:model.base_model:Epoch Step: 4613 Train Loss: 0.6936147809028625 elapsed: 0.0016827583312988281
INFO:model.base_model:Epoch Step: 4614 Train Loss: 0.6936331987380981 elapsed: 0.0017189979553222656


INFO:model.base_model:Epoch Step: 4687 Train Loss: 0.6937565803527832 elapsed: 0.0016629695892333984
INFO:model.base_model:Epoch Step: 4688 Train Loss: 0.6937336921691895 elapsed: 0.0015330314636230469
INFO:model.base_model:Epoch Step: 4689 Train Loss: 0.693710446357727 elapsed: 0.0018308162689208984
INFO:model.base_model:Epoch Step: 4690 Train Loss: 0.6936869621276855 elapsed: 0.0016477108001708984
INFO:model.base_model:Epoch Step: 4691 Train Loss: 0.6936633586883545 elapsed: 0.0016698837280273438
INFO:model.base_model:Epoch Step: 4692 Train Loss: 0.6936395168304443 elapsed: 0.001528024673461914
INFO:model.base_model:Epoch Step: 4693 Train Loss: 0.6936155557632446 elapsed: 0.0015478134155273438
INFO:model.base_model:Epoch Step: 4694 Train Loss: 0.6935915946960449 elapsed: 0.0018460750579833984
INFO:model.base_model:Epoch Step: 4695 Train Loss: 0.6935676336288452 elapsed: 0.0015578269958496094
INFO:model.base_model:Epoch Step: 4696 Train Loss: 0.6935437917709351 elapsed: 0.001540899276

INFO:model.base_model:Epoch Step: 4769 Train Loss: 0.6952013969421387 elapsed: 0.0015168190002441406
INFO:model.base_model:Epoch Step: 4770 Train Loss: 0.6952596306800842 elapsed: 0.0018339157104492188
INFO:model.base_model:Epoch Step: 4771 Train Loss: 0.6953158378601074 elapsed: 0.0018002986907958984
INFO:model.base_model:Epoch Step: 4772 Train Loss: 0.6953697204589844 elapsed: 0.0016589164733886719
INFO:model.base_model:Epoch Step: 4773 Train Loss: 0.6954210996627808 elapsed: 0.0014879703521728516
INFO:model.base_model:Epoch Step: 4774 Train Loss: 0.6954697370529175 elapsed: 0.0015611648559570312
INFO:model.base_model:Epoch Step: 4775 Train Loss: 0.6955153942108154 elapsed: 0.0016160011291503906
INFO:model.base_model:Epoch Step: 4776 Train Loss: 0.6955578327178955 elapsed: 0.0015747547149658203
INFO:model.base_model:Epoch Step: 4777 Train Loss: 0.6955970525741577 elapsed: 0.0016198158264160156
INFO:model.base_model:Epoch Step: 4778 Train Loss: 0.6956326961517334 elapsed: 0.0016419887

INFO:model.base_model:Epoch Step: 4851 Train Loss: 0.6932984590530396 elapsed: 0.0018148422241210938
INFO:model.base_model:Epoch Step: 4852 Train Loss: 0.6933286190032959 elapsed: 0.0023462772369384766
INFO:model.base_model:Epoch Step: 4853 Train Loss: 0.6933610439300537 elapsed: 0.0018880367279052734
INFO:model.base_model:Epoch Step: 4854 Train Loss: 0.6933956146240234 elapsed: 0.0019371509552001953
INFO:model.base_model:Epoch Step: 4855 Train Loss: 0.6934323310852051 elapsed: 0.00179290771484375
INFO:model.base_model:Epoch Step: 4856 Train Loss: 0.6934710741043091 elapsed: 0.0018010139465332031
INFO:model.base_model:Epoch Step: 4857 Train Loss: 0.6935116052627563 elapsed: 0.0016660690307617188
INFO:model.base_model:Epoch Step: 4858 Train Loss: 0.6935539245605469 elapsed: 0.0017092227935791016
INFO:model.base_model:Epoch Step: 4859 Train Loss: 0.6935979127883911 elapsed: 0.0017898082733154297
INFO:model.base_model:Epoch Step: 4860 Train Loss: 0.6936434507369995 elapsed: 0.001776695251

INFO:model.base_model:Epoch Step: 4933 Train Loss: 0.6962486505508423 elapsed: 0.0016772747039794922
INFO:model.base_model:Epoch Step: 4934 Train Loss: 0.696243166923523 elapsed: 0.0018780231475830078
INFO:model.base_model:Epoch Step: 4935 Train Loss: 0.6962366104125977 elapsed: 0.0017049312591552734
INFO:model.base_model:Epoch Step: 4936 Train Loss: 0.6962287425994873 elapsed: 0.0017421245574951172
INFO:model.base_model:Epoch Step: 4937 Train Loss: 0.696219801902771 elapsed: 0.005733013153076172
INFO:model.base_model:Epoch Step: 4938 Train Loss: 0.6962097883224487 elapsed: 0.007179975509643555
INFO:model.base_model:Epoch Step: 4939 Train Loss: 0.6961986422538757 elapsed: 0.002791881561279297
INFO:model.base_model:Epoch Step: 4940 Train Loss: 0.6961864233016968 elapsed: 0.0021996498107910156
INFO:model.base_model:Epoch Step: 4941 Train Loss: 0.6961731910705566 elapsed: 0.001918792724609375
INFO:model.base_model:Epoch Step: 4942 Train Loss: 0.6961588859558105 elapsed: 0.0021171569824218

INFO:model.base_model:Epoch Step: 5015 Train Loss: 0.6940287351608276 elapsed: 0.0016300678253173828
INFO:model.base_model:Epoch Step: 5016 Train Loss: 0.6940029859542847 elapsed: 0.0016200542449951172
INFO:model.base_model:Epoch Step: 5017 Train Loss: 0.6939775943756104 elapsed: 0.001718759536743164
INFO:model.base_model:Epoch Step: 5018 Train Loss: 0.6939525604248047 elapsed: 0.001477956771850586
INFO:model.base_model:Epoch Step: 5019 Train Loss: 0.6939278841018677 elapsed: 0.0016498565673828125
INFO:model.base_model:Epoch Step: 5020 Train Loss: 0.6939036846160889 elapsed: 0.0016789436340332031
INFO:model.base_model:Epoch Step: 5021 Train Loss: 0.6938798427581787 elapsed: 0.0016019344329833984
INFO:model.base_model:Epoch Step: 5022 Train Loss: 0.6938563585281372 elapsed: 0.001516103744506836
INFO:model.base_model:Epoch Step: 5023 Train Loss: 0.6938333511352539 elapsed: 0.0016429424285888672
INFO:model.base_model:Epoch Step: 5024 Train Loss: 0.6938107013702393 elapsed: 0.0017552375793

INFO:model.base_model:Epoch Step: 5097 Train Loss: 0.6931540966033936 elapsed: 0.0016667842864990234
INFO:model.base_model:Epoch Step: 5098 Train Loss: 0.6931557655334473 elapsed: 0.0018410682678222656
INFO:model.base_model:Epoch Step: 5099 Train Loss: 0.6931577920913696 elapsed: 0.00162506103515625
INFO:model.base_model:Epoch Step: 5100 Train Loss: 0.6931599974632263 elapsed: 0.0015971660614013672
INFO:model.base_model:Epoch Step: 5101 Train Loss: 0.6931623220443726 elapsed: 0.0015606880187988281
INFO:model.base_model:Epoch Step: 5102 Train Loss: 0.6931648254394531 elapsed: 0.0016231536865234375
INFO:model.base_model:Epoch Step: 5103 Train Loss: 0.693167507648468 elapsed: 0.0014510154724121094
INFO:model.base_model:Epoch Step: 5104 Train Loss: 0.693170428276062 elapsed: 0.001725912094116211
INFO:model.base_model:Epoch Step: 5105 Train Loss: 0.6931734085083008 elapsed: 0.0017080307006835938
INFO:model.base_model:Epoch Step: 5106 Train Loss: 0.6931765079498291 elapsed: 0.001543998718261

INFO:model.base_model:Epoch Step: 5179 Train Loss: 0.6935034990310669 elapsed: 0.0020837783813476562
INFO:model.base_model:Epoch Step: 5180 Train Loss: 0.6935049891471863 elapsed: 0.0019299983978271484
INFO:model.base_model:Epoch Step: 5181 Train Loss: 0.6935063600540161 elapsed: 0.0020411014556884766
INFO:model.base_model:Epoch Step: 5182 Train Loss: 0.6935076713562012 elapsed: 0.0018527507781982422
INFO:model.base_model:Epoch Step: 5183 Train Loss: 0.6935086846351624 elapsed: 0.0015759468078613281
INFO:model.base_model:Epoch Step: 5184 Train Loss: 0.693509578704834 elapsed: 0.002410888671875
INFO:model.base_model:Epoch Step: 5185 Train Loss: 0.6935102939605713 elapsed: 0.0019609928131103516
INFO:model.base_model:Epoch Step: 5186 Train Loss: 0.693510890007019 elapsed: 0.0021491050720214844
INFO:model.base_model:Epoch Step: 5187 Train Loss: 0.6935113072395325 elapsed: 0.0020449161529541016
INFO:model.base_model:Epoch Step: 5188 Train Loss: 0.6935114860534668 elapsed: 0.0019316673278808

INFO:model.base_model:Epoch Step: 5261 Train Loss: 0.6932163238525391 elapsed: 0.0016179084777832031
INFO:model.base_model:Epoch Step: 5262 Train Loss: 0.6932119131088257 elapsed: 0.001641988754272461
INFO:model.base_model:Epoch Step: 5263 Train Loss: 0.6932075023651123 elapsed: 0.0017740726470947266
INFO:model.base_model:Epoch Step: 5264 Train Loss: 0.6932032704353333 elapsed: 0.0014719963073730469
INFO:model.base_model:Epoch Step: 5265 Train Loss: 0.6931992769241333 elapsed: 0.001367807388305664
INFO:model.base_model:Epoch Step: 5266 Train Loss: 0.6931952238082886 elapsed: 0.001722097396850586
INFO:model.base_model:Epoch Step: 5267 Train Loss: 0.6931913495063782 elapsed: 0.0017490386962890625
INFO:model.base_model:Epoch Step: 5268 Train Loss: 0.6931876540184021 elapsed: 0.0016391277313232422
INFO:model.base_model:Epoch Step: 5269 Train Loss: 0.6931840181350708 elapsed: 0.0017547607421875
INFO:model.base_model:Epoch Step: 5270 Train Loss: 0.6931806802749634 elapsed: 0.0019128322601318

INFO:model.base_model:Epoch Step: 5343 Train Loss: 0.6934397220611572 elapsed: 0.0023870468139648438
INFO:model.base_model:Epoch Step: 5344 Train Loss: 0.6934496164321899 elapsed: 0.001980304718017578
INFO:model.base_model:Epoch Step: 5345 Train Loss: 0.6934595108032227 elapsed: 0.0023360252380371094
INFO:model.base_model:Epoch Step: 5346 Train Loss: 0.6934695243835449 elapsed: 0.0019001960754394531
INFO:model.base_model:Epoch Step: 5347 Train Loss: 0.6934795379638672 elapsed: 0.0018558502197265625
INFO:model.base_model:Epoch Step: 5348 Train Loss: 0.693489670753479 elapsed: 0.0018830299377441406
INFO:model.base_model:Epoch Step: 5349 Train Loss: 0.6934999227523804 elapsed: 0.0017769336700439453
INFO:model.base_model:Epoch Step: 5350 Train Loss: 0.693510115146637 elapsed: 0.0019559860229492188
INFO:model.base_model:Epoch Step: 5351 Train Loss: 0.6935203671455383 elapsed: 0.0022428035736083984
INFO:model.base_model:Epoch Step: 5352 Train Loss: 0.6935306787490845 elapsed: 0.0028407573699

INFO:model.base_model:Epoch Step: 5425 Train Loss: 0.6938782930374146 elapsed: 0.0018110275268554688
INFO:model.base_model:Epoch Step: 5426 Train Loss: 0.6938722133636475 elapsed: 0.0016720294952392578
INFO:model.base_model:Epoch Step: 5427 Train Loss: 0.6938658952713013 elapsed: 0.0023789405822753906
INFO:model.base_model:Epoch Step: 5428 Train Loss: 0.6938591003417969 elapsed: 0.00167083740234375
INFO:model.base_model:Epoch Step: 5429 Train Loss: 0.6938521265983582 elapsed: 0.00870203971862793
INFO:model.base_model:Epoch Step: 5430 Train Loss: 0.6938447952270508 elapsed: 0.0022249221801757812
INFO:model.base_model:Epoch Step: 5431 Train Loss: 0.6938371658325195 elapsed: 0.0019450187683105469
INFO:model.base_model:Epoch Step: 5432 Train Loss: 0.6938291788101196 elapsed: 0.0017819404602050781
INFO:model.base_model:Epoch Step: 5433 Train Loss: 0.6938209533691406 elapsed: 0.001909017562866211
INFO:model.base_model:Epoch Step: 5434 Train Loss: 0.6938124895095825 elapsed: 0.004138946533203

INFO:model.base_model:Epoch Step: 5507 Train Loss: 0.6931474208831787 elapsed: 0.0019490718841552734
INFO:model.base_model:Epoch Step: 5508 Train Loss: 0.6931479573249817 elapsed: 0.0017008781433105469
INFO:model.base_model:Epoch Step: 5509 Train Loss: 0.6931490302085876 elapsed: 0.001966238021850586
INFO:model.base_model:Epoch Step: 5510 Train Loss: 0.6931505799293518 elapsed: 0.0018162727355957031
INFO:model.base_model:Epoch Step: 5511 Train Loss: 0.6931526064872742 elapsed: 0.0020020008087158203
INFO:model.base_model:Epoch Step: 5512 Train Loss: 0.69315505027771 elapsed: 0.0017619132995605469
INFO:model.base_model:Epoch Step: 5513 Train Loss: 0.6931580305099487 elapsed: 0.0017888545989990234
INFO:model.base_model:Epoch Step: 5514 Train Loss: 0.6931614875793457 elapsed: 0.0018210411071777344
INFO:model.base_model:Epoch Step: 5515 Train Loss: 0.6931654214859009 elapsed: 0.0016582012176513672
INFO:model.base_model:Epoch Step: 5516 Train Loss: 0.6931698322296143 elapsed: 0.0017371177673

INFO:model.base_model:Epoch Step: 5589 Train Loss: 0.6943050622940063 elapsed: 0.002051115036010742
INFO:model.base_model:Epoch Step: 5590 Train Loss: 0.6943197846412659 elapsed: 0.0017299652099609375
INFO:model.base_model:Epoch Step: 5591 Train Loss: 0.6943340301513672 elapsed: 0.004634857177734375
INFO:model.base_model:Epoch Step: 5592 Train Loss: 0.6943478584289551 elapsed: 0.004678964614868164
INFO:model.base_model:Epoch Step: 5593 Train Loss: 0.6943612098693848 elapsed: 0.004029989242553711
INFO:model.base_model:Epoch Step: 5594 Train Loss: 0.6943740844726562 elapsed: 0.003045797348022461
INFO:model.base_model:Epoch Step: 5595 Train Loss: 0.6943864822387695 elapsed: 0.0042591094970703125
INFO:model.base_model:Epoch Step: 5596 Train Loss: 0.6943983435630798 elapsed: 0.002368927001953125
INFO:model.base_model:Epoch Step: 5597 Train Loss: 0.6944096088409424 elapsed: 0.0020079612731933594
INFO:model.base_model:Epoch Step: 5598 Train Loss: 0.6944203972816467 elapsed: 0.0020060539245605

INFO:model.base_model:Epoch Step: 5671 Train Loss: 0.693633496761322 elapsed: 0.0020160675048828125
INFO:model.base_model:Epoch Step: 5672 Train Loss: 0.6936119198799133 elapsed: 0.004047870635986328
INFO:model.base_model:Epoch Step: 5673 Train Loss: 0.693590521812439 elapsed: 0.0027551651000976562
INFO:model.base_model:Epoch Step: 5674 Train Loss: 0.6935694813728333 elapsed: 0.0022199153900146484
INFO:model.base_model:Epoch Step: 5675 Train Loss: 0.6935486793518066 elapsed: 0.0022552013397216797
INFO:model.base_model:Epoch Step: 5676 Train Loss: 0.6935281157493591 elapsed: 0.002391815185546875
INFO:model.base_model:Epoch Step: 5677 Train Loss: 0.693507969379425 elapsed: 0.002118825912475586
INFO:model.base_model:Epoch Step: 5678 Train Loss: 0.6934881210327148 elapsed: 0.0022830963134765625
INFO:model.base_model:Epoch Step: 5679 Train Loss: 0.6934688091278076 elapsed: 0.0017631053924560547
INFO:model.base_model:Epoch Step: 5680 Train Loss: 0.6934497356414795 elapsed: 0.0020029544830322

INFO:model.base_model:Epoch Step: 5753 Train Loss: 0.6937966346740723 elapsed: 0.0019447803497314453
INFO:model.base_model:Epoch Step: 5754 Train Loss: 0.693821907043457 elapsed: 0.0016450881958007812
INFO:model.base_model:Epoch Step: 5755 Train Loss: 0.6938473582267761 elapsed: 0.00185394287109375
INFO:model.base_model:Epoch Step: 5756 Train Loss: 0.69387286901474 elapsed: 0.0016319751739501953
INFO:model.base_model:Epoch Step: 5757 Train Loss: 0.6938984394073486 elapsed: 0.001605987548828125
INFO:model.base_model:Epoch Step: 5758 Train Loss: 0.693924069404602 elapsed: 0.00162506103515625
INFO:model.base_model:Epoch Step: 5759 Train Loss: 0.693949818611145 elapsed: 0.0017881393432617188
INFO:model.base_model:Epoch Step: 5760 Train Loss: 0.6939753890037537 elapsed: 0.0015680789947509766
INFO:model.base_model:Epoch Step: 5761 Train Loss: 0.6940009593963623 elapsed: 0.0017919540405273438
INFO:model.base_model:Epoch Step: 5762 Train Loss: 0.6940264701843262 elapsed: 0.0015969276428222656


INFO:model.base_model:Epoch Step: 5835 Train Loss: 0.6941049098968506 elapsed: 0.0016331672668457031
INFO:model.base_model:Epoch Step: 5836 Train Loss: 0.6940768957138062 elapsed: 0.0019347667694091797
INFO:model.base_model:Epoch Step: 5837 Train Loss: 0.6940486431121826 elapsed: 0.0015816688537597656
INFO:model.base_model:Epoch Step: 5838 Train Loss: 0.69402015209198 elapsed: 0.0017418861389160156
INFO:model.base_model:Epoch Step: 5839 Train Loss: 0.6939914226531982 elapsed: 0.0015549659729003906
INFO:model.base_model:Epoch Step: 5840 Train Loss: 0.6939625144004822 elapsed: 0.0016851425170898438
INFO:model.base_model:Epoch Step: 5841 Train Loss: 0.6939334869384766 elapsed: 0.0015680789947509766
INFO:model.base_model:Epoch Step: 5842 Train Loss: 0.6939042806625366 elapsed: 0.0016031265258789062
INFO:model.base_model:Epoch Step: 5843 Train Loss: 0.6938751339912415 elapsed: 0.0016009807586669922
INFO:model.base_model:Epoch Step: 5844 Train Loss: 0.6938459277153015 elapsed: 0.001670837402

INFO:model.base_model:Epoch Step: 5917 Train Loss: 0.6939631700515747 elapsed: 0.0016248226165771484
INFO:model.base_model:Epoch Step: 5918 Train Loss: 0.694005012512207 elapsed: 0.0015208721160888672
INFO:model.base_model:Epoch Step: 5919 Train Loss: 0.6940474510192871 elapsed: 0.0017728805541992188
INFO:model.base_model:Epoch Step: 5920 Train Loss: 0.6940902471542358 elapsed: 0.0017268657684326172
INFO:model.base_model:Epoch Step: 5921 Train Loss: 0.6941336393356323 elapsed: 0.0018069744110107422
INFO:model.base_model:Epoch Step: 5922 Train Loss: 0.6941773891448975 elapsed: 0.0015599727630615234
INFO:model.base_model:Epoch Step: 5923 Train Loss: 0.6942212581634521 elapsed: 0.0016491413116455078
INFO:model.base_model:Epoch Step: 5924 Train Loss: 0.6942654848098755 elapsed: 0.001560211181640625
INFO:model.base_model:Epoch Step: 5925 Train Loss: 0.6943099498748779 elapsed: 0.0015499591827392578
INFO:model.base_model:Epoch Step: 5926 Train Loss: 0.6943545341491699 elapsed: 0.001607179641

INFO:model.base_model:Epoch Step: 5999 Train Loss: 0.694492757320404 elapsed: 0.001734018325805664
INFO:model.base_model:Epoch Step: 6000 Train Loss: 0.6944486498832703 elapsed: 0.0015909671783447266
INFO:model.base_model:Epoch Step: 6001 Train Loss: 0.6944043636322021 elapsed: 0.0015871524810791016
INFO:model.base_model:Epoch Step: 6002 Train Loss: 0.6943601369857788 elapsed: 0.0016469955444335938
INFO:model.base_model:Epoch Step: 6003 Train Loss: 0.694316029548645 elapsed: 0.0016870498657226562
INFO:model.base_model:Epoch Step: 6004 Train Loss: 0.6942721009254456 elapsed: 0.0015208721160888672
INFO:model.base_model:Epoch Step: 6005 Train Loss: 0.6942282915115356 elapsed: 0.0017731189727783203
INFO:model.base_model:Epoch Step: 6006 Train Loss: 0.6941848993301392 elapsed: 0.0017130374908447266
INFO:model.base_model:Epoch Step: 6007 Train Loss: 0.6941418647766113 elapsed: 0.0015900135040283203
INFO:model.base_model:Epoch Step: 6008 Train Loss: 0.6940991878509521 elapsed: 0.0015423297882

INFO:model.base_model:Epoch Step: 6081 Train Loss: 0.6934912204742432 elapsed: 0.0016808509826660156
INFO:model.base_model:Epoch Step: 6082 Train Loss: 0.6935089230537415 elapsed: 0.0017058849334716797
INFO:model.base_model:Epoch Step: 6083 Train Loss: 0.6935266852378845 elapsed: 0.0016512870788574219
INFO:model.base_model:Epoch Step: 6084 Train Loss: 0.6935446262359619 elapsed: 0.0015916824340820312
INFO:model.base_model:Epoch Step: 6085 Train Loss: 0.6935625076293945 elapsed: 0.0017809867858886719
INFO:model.base_model:Epoch Step: 6086 Train Loss: 0.6935803890228271 elapsed: 0.0016651153564453125
INFO:model.base_model:Epoch Step: 6087 Train Loss: 0.6935982704162598 elapsed: 0.0016231536865234375
INFO:model.base_model:Epoch Step: 6088 Train Loss: 0.6936161518096924 elapsed: 0.001725912094116211
INFO:model.base_model:Epoch Step: 6089 Train Loss: 0.6936339139938354 elapsed: 0.00412297248840332
INFO:model.base_model:Epoch Step: 6090 Train Loss: 0.693651556968689 elapsed: 0.00826907157897

INFO:model.base_model:Epoch Step: 6163 Train Loss: 0.6934212446212769 elapsed: 0.0016710758209228516
INFO:model.base_model:Epoch Step: 6164 Train Loss: 0.6933995485305786 elapsed: 0.0018901824951171875
INFO:model.base_model:Epoch Step: 6165 Train Loss: 0.693378210067749 elapsed: 0.0017638206481933594
INFO:model.base_model:Epoch Step: 6166 Train Loss: 0.6933573484420776 elapsed: 0.0015888214111328125
INFO:model.base_model:Epoch Step: 6167 Train Loss: 0.6933369636535645 elapsed: 0.0015869140625
INFO:model.base_model:Epoch Step: 6168 Train Loss: 0.6933171153068542 elapsed: 0.0017390251159667969
INFO:model.base_model:Epoch Step: 6169 Train Loss: 0.6932979822158813 elapsed: 0.0016312599182128906
INFO:model.base_model:Epoch Step: 6170 Train Loss: 0.6932795643806458 elapsed: 0.001558065414428711
INFO:model.base_model:Epoch Step: 6171 Train Loss: 0.693261981010437 elapsed: 0.001444101333618164
INFO:model.base_model:Epoch Step: 6172 Train Loss: 0.6932454109191895 elapsed: 0.00162506103515625
IN

INFO:model.base_model:Epoch Step: 6245 Train Loss: 0.695926308631897 elapsed: 0.002025127410888672
INFO:model.base_model:Epoch Step: 6246 Train Loss: 0.6959156394004822 elapsed: 0.001928091049194336
INFO:model.base_model:Epoch Step: 6247 Train Loss: 0.6958991289138794 elapsed: 0.0017328262329101562
INFO:model.base_model:Epoch Step: 6248 Train Loss: 0.6958768367767334 elapsed: 0.008511066436767578
INFO:model.base_model:Epoch Step: 6249 Train Loss: 0.6958487033843994 elapsed: 0.003428936004638672
INFO:model.base_model:Epoch Step: 6250 Train Loss: 0.6958149671554565 elapsed: 0.001920938491821289
INFO:model.base_model:Epoch Step: 6251 Train Loss: 0.6957756280899048 elapsed: 0.0017237663269042969
INFO:model.base_model:Epoch Step: 6252 Train Loss: 0.6957306861877441 elapsed: 0.006695985794067383
INFO:model.base_model:Epoch Step: 6253 Train Loss: 0.6956806182861328 elapsed: 0.003415822982788086
INFO:model.base_model:Epoch Step: 6254 Train Loss: 0.6956253051757812 elapsed: 0.001749038696289062

INFO:model.base_model:Epoch Step: 6327 Train Loss: 0.6957975625991821 elapsed: 0.0018360614776611328
INFO:model.base_model:Epoch Step: 6328 Train Loss: 0.6959065794944763 elapsed: 0.0019059181213378906
INFO:model.base_model:Epoch Step: 6329 Train Loss: 0.696014940738678 elapsed: 0.004724025726318359
INFO:model.base_model:Epoch Step: 6330 Train Loss: 0.6961224675178528 elapsed: 0.0065457820892333984
INFO:model.base_model:Epoch Step: 6331 Train Loss: 0.6962289810180664 elapsed: 0.0017740726470947266
INFO:model.base_model:Epoch Step: 6332 Train Loss: 0.6963343620300293 elapsed: 0.0015540122985839844
INFO:model.base_model:Epoch Step: 6333 Train Loss: 0.6964383125305176 elapsed: 0.0015091896057128906
INFO:model.base_model:Epoch Step: 6334 Train Loss: 0.6965408325195312 elapsed: 0.0051920413970947266
INFO:model.base_model:Epoch Step: 6335 Train Loss: 0.6966416239738464 elapsed: 0.004321098327636719
INFO:model.base_model:Epoch Step: 6336 Train Loss: 0.6967405676841736 elapsed: 0.0025563240051

INFO:model.base_model:Epoch Step: 6409 Train Loss: 0.6946967840194702 elapsed: 0.002123117446899414
INFO:model.base_model:Epoch Step: 6410 Train Loss: 0.6945874691009521 elapsed: 0.0022928714752197266
INFO:model.base_model:Epoch Step: 6411 Train Loss: 0.6944801807403564 elapsed: 0.0039980411529541016
INFO:model.base_model:Epoch Step: 6412 Train Loss: 0.6943751573562622 elapsed: 0.0017328262329101562
INFO:model.base_model:Epoch Step: 6413 Train Loss: 0.6942726969718933 elapsed: 0.002156972885131836
INFO:model.base_model:Epoch Step: 6414 Train Loss: 0.6941729784011841 elapsed: 0.0024499893188476562
INFO:model.base_model:Epoch Step: 6415 Train Loss: 0.6940761804580688 elapsed: 0.002229928970336914
INFO:model.base_model:Epoch Step: 6416 Train Loss: 0.6939827799797058 elapsed: 0.002167940139770508
INFO:model.base_model:Epoch Step: 6417 Train Loss: 0.6938929557800293 elapsed: 0.007124185562133789
INFO:model.base_model:Epoch Step: 6418 Train Loss: 0.6938069462776184 elapsed: 0.002503871917724

INFO:model.base_model:Epoch Step: 6491 Train Loss: 0.700417160987854 elapsed: 0.0017418861389160156
INFO:model.base_model:Epoch Step: 6492 Train Loss: 0.7003960609436035 elapsed: 0.0015497207641601562
INFO:model.base_model:Epoch Step: 6493 Train Loss: 0.7003610134124756 elapsed: 0.002351999282836914
INFO:model.base_model:Epoch Step: 6494 Train Loss: 0.7003123164176941 elapsed: 0.001971006393432617
INFO:model.base_model:Epoch Step: 6495 Train Loss: 0.7002501487731934 elapsed: 0.0017549991607666016
INFO:model.base_model:Epoch Step: 6496 Train Loss: 0.7001748085021973 elapsed: 0.0018639564514160156
INFO:model.base_model:Epoch Step: 6497 Train Loss: 0.7000865936279297 elapsed: 0.0017893314361572266
INFO:model.base_model:Epoch Step: 6498 Train Loss: 0.6999859809875488 elapsed: 0.002354860305786133
INFO:model.base_model:Epoch Step: 6499 Train Loss: 0.6998733878135681 elapsed: 0.0018420219421386719
INFO:model.base_model:Epoch Step: 6500 Train Loss: 0.6997494697570801 elapsed: 0.00222706794738

INFO:model.base_model:Epoch Step: 6573 Train Loss: 0.6939389705657959 elapsed: 0.001664876937866211
INFO:model.base_model:Epoch Step: 6574 Train Loss: 0.6939891576766968 elapsed: 0.0017247200012207031
INFO:model.base_model:Epoch Step: 6575 Train Loss: 0.6940393447875977 elapsed: 0.00185394287109375
INFO:model.base_model:Epoch Step: 6576 Train Loss: 0.6940893530845642 elapsed: 0.0017769336700439453
INFO:model.base_model:Epoch Step: 6577 Train Loss: 0.6941390037536621 elapsed: 0.0017499923706054688
INFO:model.base_model:Epoch Step: 6578 Train Loss: 0.6941882371902466 elapsed: 0.0018308162689208984
INFO:model.base_model:Epoch Step: 6579 Train Loss: 0.6942368745803833 elapsed: 0.0015621185302734375
INFO:model.base_model:Epoch Step: 6580 Train Loss: 0.6942849159240723 elapsed: 0.0015208721160888672
INFO:model.base_model:Epoch Step: 6581 Train Loss: 0.6943321824073792 elapsed: 0.0018439292907714844
INFO:model.base_model:Epoch Step: 6582 Train Loss: 0.6943786144256592 elapsed: 0.0017571449279

INFO:model.base_model:Epoch Step: 6655 Train Loss: 0.6938076019287109 elapsed: 0.002094745635986328
INFO:model.base_model:Epoch Step: 6656 Train Loss: 0.6937658786773682 elapsed: 0.0020589828491210938
INFO:model.base_model:Epoch Step: 6657 Train Loss: 0.6937247514724731 elapsed: 0.0017879009246826172
INFO:model.base_model:Epoch Step: 6658 Train Loss: 0.6936842799186707 elapsed: 0.0020017623901367188
INFO:model.base_model:Epoch Step: 6659 Train Loss: 0.6936445236206055 elapsed: 0.0018570423126220703
INFO:model.base_model:Epoch Step: 6660 Train Loss: 0.6936056613922119 elapsed: 0.0019450187683105469
INFO:model.base_model:Epoch Step: 6661 Train Loss: 0.6935676336288452 elapsed: 0.001870870590209961
INFO:model.base_model:Epoch Step: 6662 Train Loss: 0.6935306787490845 elapsed: 0.001811981201171875
INFO:model.base_model:Epoch Step: 6663 Train Loss: 0.6934949159622192 elapsed: 0.0019121170043945312
INFO:model.base_model:Epoch Step: 6664 Train Loss: 0.69346022605896 elapsed: 0.001817226409912

INFO:model.base_model:Epoch Step: 6737 Train Loss: 0.697946310043335 elapsed: 0.0023260116577148438
INFO:model.base_model:Epoch Step: 6738 Train Loss: 0.6980667114257812 elapsed: 0.002110004425048828
INFO:model.base_model:Epoch Step: 6739 Train Loss: 0.6981842517852783 elapsed: 0.001825094223022461
INFO:model.base_model:Epoch Step: 6740 Train Loss: 0.6982986927032471 elapsed: 0.0027458667755126953
INFO:model.base_model:Epoch Step: 6741 Train Loss: 0.6984098553657532 elapsed: 0.0048639774322509766
INFO:model.base_model:Epoch Step: 6742 Train Loss: 0.6985174417495728 elapsed: 0.0024230480194091797
INFO:model.base_model:Epoch Step: 6743 Train Loss: 0.6986211538314819 elapsed: 0.0022809505462646484
INFO:model.base_model:Epoch Step: 6744 Train Loss: 0.6987205743789673 elapsed: 0.004929065704345703
INFO:model.base_model:Epoch Step: 6745 Train Loss: 0.6988157033920288 elapsed: 0.0020720958709716797
INFO:model.base_model:Epoch Step: 6746 Train Loss: 0.6989061236381531 elapsed: 0.00201106071472

INFO:model.base_model:Epoch Step: 6819 Train Loss: 0.6936010718345642 elapsed: 0.001499176025390625
INFO:model.base_model:Epoch Step: 6820 Train Loss: 0.6935391426086426 elapsed: 0.00180816650390625
INFO:model.base_model:Epoch Step: 6821 Train Loss: 0.6934818029403687 elapsed: 0.0018308162689208984
INFO:model.base_model:Epoch Step: 6822 Train Loss: 0.6934291124343872 elapsed: 0.0017652511596679688
INFO:model.base_model:Epoch Step: 6823 Train Loss: 0.6933809518814087 elapsed: 0.001608133316040039
INFO:model.base_model:Epoch Step: 6824 Train Loss: 0.6933374404907227 elapsed: 0.0019750595092773438
INFO:model.base_model:Epoch Step: 6825 Train Loss: 0.6932984590530396 elapsed: 0.0019659996032714844
INFO:model.base_model:Epoch Step: 6826 Train Loss: 0.6932640075683594 elapsed: 0.0019769668579101562
INFO:model.base_model:Epoch Step: 6827 Train Loss: 0.6932340860366821 elapsed: 0.0017461776733398438
INFO:model.base_model:Epoch Step: 6828 Train Loss: 0.6932086944580078 elapsed: 0.00184416770935

INFO:model.base_model:Epoch Step: 6901 Train Loss: 0.6977705955505371 elapsed: 0.0020291805267333984
INFO:model.base_model:Epoch Step: 6902 Train Loss: 0.6978395581245422 elapsed: 0.0022618770599365234
INFO:model.base_model:Epoch Step: 6903 Train Loss: 0.6979069709777832 elapsed: 0.005494117736816406
INFO:model.base_model:Epoch Step: 6904 Train Loss: 0.6979727149009705 elapsed: 0.004559755325317383
INFO:model.base_model:Epoch Step: 6905 Train Loss: 0.698036789894104 elapsed: 0.0021979808807373047
INFO:model.base_model:Epoch Step: 6906 Train Loss: 0.6980993151664734 elapsed: 0.0020012855529785156
INFO:model.base_model:Epoch Step: 6907 Train Loss: 0.6981600522994995 elapsed: 0.002176046371459961
INFO:model.base_model:Epoch Step: 6908 Train Loss: 0.6982189416885376 elapsed: 0.0019538402557373047
INFO:model.base_model:Epoch Step: 6909 Train Loss: 0.698276162147522 elapsed: 0.001977205276489258
INFO:model.base_model:Epoch Step: 6910 Train Loss: 0.6983314752578735 elapsed: 0.0026400089263916

INFO:model.base_model:Epoch Step: 6983 Train Loss: 0.6971591711044312 elapsed: 0.0020401477813720703
INFO:model.base_model:Epoch Step: 6984 Train Loss: 0.6970905065536499 elapsed: 0.001949310302734375
INFO:model.base_model:Epoch Step: 6985 Train Loss: 0.6970213651657104 elapsed: 0.0019218921661376953
INFO:model.base_model:Epoch Step: 6986 Train Loss: 0.6969517469406128 elapsed: 0.0020198822021484375
INFO:model.base_model:Epoch Step: 6987 Train Loss: 0.6968817114830017 elapsed: 0.0020627975463867188
INFO:model.base_model:Epoch Step: 6988 Train Loss: 0.696811318397522 elapsed: 0.0022346973419189453
INFO:model.base_model:Epoch Step: 6989 Train Loss: 0.6967406272888184 elapsed: 0.002061128616333008
INFO:model.base_model:Epoch Step: 6990 Train Loss: 0.6966696977615356 elapsed: 0.0019571781158447266
INFO:model.base_model:Epoch Step: 6991 Train Loss: 0.6965985298156738 elapsed: 0.0017991065979003906
INFO:model.base_model:Epoch Step: 6992 Train Loss: 0.6965271830558777 elapsed: 0.0021140575408

INFO:model.base_model:Epoch Step: 7065 Train Loss: 0.6931474208831787 elapsed: 0.004392862319946289
INFO:model.base_model:Epoch Step: 7066 Train Loss: 0.6931474208831787 elapsed: 0.0025908946990966797
INFO:model.base_model:Epoch Step: 7067 Train Loss: 0.6931487917900085 elapsed: 0.0018737316131591797
INFO:model.base_model:Epoch Step: 7068 Train Loss: 0.6931517124176025 elapsed: 0.0020411014556884766
INFO:model.base_model:Epoch Step: 7069 Train Loss: 0.6931561231613159 elapsed: 0.00197601318359375
INFO:model.base_model:Epoch Step: 7070 Train Loss: 0.6931620836257935 elapsed: 0.002360105514526367
INFO:model.base_model:Epoch Step: 7071 Train Loss: 0.6931694746017456 elapsed: 0.0017957687377929688
INFO:model.base_model:Epoch Step: 7072 Train Loss: 0.6931784152984619 elapsed: 0.0019860267639160156
INFO:model.base_model:Epoch Step: 7073 Train Loss: 0.6931887865066528 elapsed: 0.0020132064819335938
INFO:model.base_model:Epoch Step: 7074 Train Loss: 0.6932007074356079 elapsed: 0.00203680992126

INFO:model.base_model:Epoch Step: 7147 Train Loss: 0.696586549282074 elapsed: 0.0017180442810058594
INFO:model.base_model:Epoch Step: 7148 Train Loss: 0.6966357231140137 elapsed: 0.0017650127410888672
INFO:model.base_model:Epoch Step: 7149 Train Loss: 0.6966840028762817 elapsed: 0.0018248558044433594
INFO:model.base_model:Epoch Step: 7150 Train Loss: 0.6967311501502991 elapsed: 0.0019109249114990234
INFO:model.base_model:Epoch Step: 7151 Train Loss: 0.69677734375 elapsed: 0.0017740726470947266
INFO:model.base_model:Epoch Step: 7152 Train Loss: 0.6968224048614502 elapsed: 0.001961231231689453
INFO:model.base_model:Epoch Step: 7153 Train Loss: 0.6968663930892944 elapsed: 0.0018718242645263672
INFO:model.base_model:Epoch Step: 7154 Train Loss: 0.6969091892242432 elapsed: 0.0018661022186279297
INFO:model.base_model:Epoch Step: 7155 Train Loss: 0.6969508528709412 elapsed: 0.001657724380493164
INFO:model.base_model:Epoch Step: 7156 Train Loss: 0.6969913244247437 elapsed: 0.001682043075561523

INFO:model.base_model:Epoch Step: 7229 Train Loss: 0.6961909532546997 elapsed: 0.001583099365234375
INFO:model.base_model:Epoch Step: 7230 Train Loss: 0.6961449980735779 elapsed: 0.0017619132995605469
INFO:model.base_model:Epoch Step: 7231 Train Loss: 0.6960988640785217 elapsed: 0.0017309188842773438
INFO:model.base_model:Epoch Step: 7232 Train Loss: 0.6960526704788208 elapsed: 0.0018367767333984375
INFO:model.base_model:Epoch Step: 7233 Train Loss: 0.6960063576698303 elapsed: 0.0017268657684326172
INFO:model.base_model:Epoch Step: 7234 Train Loss: 0.6959599256515503 elapsed: 0.0015001296997070312
INFO:model.base_model:Epoch Step: 7235 Train Loss: 0.695913553237915 elapsed: 0.0016362667083740234
INFO:model.base_model:Epoch Step: 7236 Train Loss: 0.6958671808242798 elapsed: 0.0016026496887207031
INFO:model.base_model:Epoch Step: 7237 Train Loss: 0.695820689201355 elapsed: 0.001592874526977539
INFO:model.base_model:Epoch Step: 7238 Train Loss: 0.6957743763923645 elapsed: 0.00151515007019

INFO:model.base_model:Epoch Step: 7311 Train Loss: 0.6934746503829956 elapsed: 0.0017788410186767578
INFO:model.base_model:Epoch Step: 7312 Train Loss: 0.6934610605239868 elapsed: 0.0017440319061279297
INFO:model.base_model:Epoch Step: 7313 Train Loss: 0.6934477686882019 elapsed: 0.0019459724426269531
INFO:model.base_model:Epoch Step: 7314 Train Loss: 0.6934348344802856 elapsed: 0.0016901493072509766
INFO:model.base_model:Epoch Step: 7315 Train Loss: 0.6934223175048828 elapsed: 0.0015940666198730469
INFO:model.base_model:Epoch Step: 7316 Train Loss: 0.6934102177619934 elapsed: 0.0016379356384277344
INFO:model.base_model:Epoch Step: 7317 Train Loss: 0.6933984756469727 elapsed: 0.001710653305053711
INFO:model.base_model:Epoch Step: 7318 Train Loss: 0.6933870911598206 elapsed: 0.0016489028930664062
INFO:model.base_model:Epoch Step: 7319 Train Loss: 0.6933760643005371 elapsed: 0.0015900135040283203
INFO:model.base_model:Epoch Step: 7320 Train Loss: 0.6933653354644775 elapsed: 0.00176906585

INFO:model.base_model:Epoch Step: 7393 Train Loss: 0.6931561231613159 elapsed: 0.0019998550415039062
INFO:model.base_model:Epoch Step: 7394 Train Loss: 0.6931569576263428 elapsed: 0.00819706916809082
INFO:model.base_model:Epoch Step: 7395 Train Loss: 0.6931577920913696 elapsed: 0.0021681785583496094
INFO:model.base_model:Epoch Step: 7396 Train Loss: 0.6931586265563965 elapsed: 0.0019698143005371094
INFO:model.base_model:Epoch Step: 7397 Train Loss: 0.6931594610214233 elapsed: 0.0019450187683105469
INFO:model.base_model:Epoch Step: 7398 Train Loss: 0.693160355091095 elapsed: 0.0017840862274169922
INFO:model.base_model:Epoch Step: 7399 Train Loss: 0.6931612491607666 elapsed: 0.0020568370819091797
INFO:model.base_model:Epoch Step: 7400 Train Loss: 0.6931621432304382 elapsed: 0.0020461082458496094
INFO:model.base_model:Epoch Step: 7401 Train Loss: 0.6931630373001099 elapsed: 0.002315044403076172
INFO:model.base_model:Epoch Step: 7402 Train Loss: 0.6931639313697815 elapsed: 0.00176000595092

INFO:model.base_model:Epoch Step: 7475 Train Loss: 0.693169355392456 elapsed: 0.001664876937866211
INFO:model.base_model:Epoch Step: 7476 Train Loss: 0.6931685209274292 elapsed: 0.0017170906066894531
INFO:model.base_model:Epoch Step: 7477 Train Loss: 0.6931676864624023 elapsed: 0.0015010833740234375
INFO:model.base_model:Epoch Step: 7478 Train Loss: 0.6931667923927307 elapsed: 0.0016219615936279297
INFO:model.base_model:Epoch Step: 7479 Train Loss: 0.6931658983230591 elapsed: 0.001714944839477539
INFO:model.base_model:Epoch Step: 7480 Train Loss: 0.6931650638580322 elapsed: 0.0016109943389892578
INFO:model.base_model:Epoch Step: 7481 Train Loss: 0.6931641697883606 elapsed: 0.001796722412109375
INFO:model.base_model:Epoch Step: 7482 Train Loss: 0.693163275718689 elapsed: 0.0015988349914550781
INFO:model.base_model:Epoch Step: 7483 Train Loss: 0.6931623816490173 elapsed: 0.0015537738800048828
INFO:model.base_model:Epoch Step: 7484 Train Loss: 0.6931615471839905 elapsed: 0.001587867736816

INFO:model.base_model:Epoch Step: 7557 Train Loss: 0.6932459473609924 elapsed: 0.0017249584197998047
INFO:model.base_model:Epoch Step: 7558 Train Loss: 0.6932505965232849 elapsed: 0.0016407966613769531
INFO:model.base_model:Epoch Step: 7559 Train Loss: 0.6932552456855774 elapsed: 0.0015189647674560547
INFO:model.base_model:Epoch Step: 7560 Train Loss: 0.6932600736618042 elapsed: 0.0017099380493164062
INFO:model.base_model:Epoch Step: 7561 Train Loss: 0.6932649612426758 elapsed: 0.0016682147979736328
INFO:model.base_model:Epoch Step: 7562 Train Loss: 0.6932699680328369 elapsed: 0.001558065414428711
INFO:model.base_model:Epoch Step: 7563 Train Loss: 0.6932750940322876 elapsed: 0.001692056655883789
INFO:model.base_model:Epoch Step: 7564 Train Loss: 0.6932802200317383 elapsed: 0.0017311573028564453
INFO:model.base_model:Epoch Step: 7565 Train Loss: 0.6932855248451233 elapsed: 0.0015411376953125
INFO:model.base_model:Epoch Step: 7566 Train Loss: 0.6932908296585083 elapsed: 0.001716136932373

INFO:model.base_model:Epoch Step: 7639 Train Loss: 0.6935538053512573 elapsed: 0.0014979839324951172
INFO:model.base_model:Epoch Step: 7640 Train Loss: 0.6935502290725708 elapsed: 0.001483917236328125
INFO:model.base_model:Epoch Step: 7641 Train Loss: 0.6935463547706604 elapsed: 0.001528024673461914
INFO:model.base_model:Epoch Step: 7642 Train Loss: 0.6935421824455261 elapsed: 0.0017850399017333984
INFO:model.base_model:Epoch Step: 7643 Train Loss: 0.6935377717018127 elapsed: 0.0015811920166015625
INFO:model.base_model:Epoch Step: 7644 Train Loss: 0.6935330629348755 elapsed: 0.001634836196899414
INFO:model.base_model:Epoch Step: 7645 Train Loss: 0.6935281753540039 elapsed: 0.0019779205322265625
INFO:model.base_model:Epoch Step: 7646 Train Loss: 0.6935229301452637 elapsed: 0.0017249584197998047
INFO:model.base_model:Epoch Step: 7647 Train Loss: 0.6935173273086548 elapsed: 0.0017690658569335938
INFO:model.base_model:Epoch Step: 7648 Train Loss: 0.6935116052627563 elapsed: 0.0016970634460

INFO:model.base_model:Epoch Step: 7721 Train Loss: 0.6932582855224609 elapsed: 0.0016930103302001953
INFO:model.base_model:Epoch Step: 7722 Train Loss: 0.6932703256607056 elapsed: 0.0016551017761230469
INFO:model.base_model:Epoch Step: 7723 Train Loss: 0.6932828426361084 elapsed: 0.0016870498657226562
INFO:model.base_model:Epoch Step: 7724 Train Loss: 0.693295955657959 elapsed: 0.001644134521484375
INFO:model.base_model:Epoch Step: 7725 Train Loss: 0.6933096647262573 elapsed: 0.001522064208984375
INFO:model.base_model:Epoch Step: 7726 Train Loss: 0.6933239102363586 elapsed: 0.0017590522766113281
INFO:model.base_model:Epoch Step: 7727 Train Loss: 0.6933386921882629 elapsed: 0.0015380382537841797
INFO:model.base_model:Epoch Step: 7728 Train Loss: 0.6933540105819702 elapsed: 0.001577138900756836
INFO:model.base_model:Epoch Step: 7729 Train Loss: 0.6933698654174805 elapsed: 0.0015990734100341797
INFO:model.base_model:Epoch Step: 7730 Train Loss: 0.6933860778808594 elapsed: 0.00247883796691

INFO:model.base_model:Epoch Step: 7803 Train Loss: 0.6937848329544067 elapsed: 0.0021140575408935547
INFO:model.base_model:Epoch Step: 7804 Train Loss: 0.6937594413757324 elapsed: 0.00168609619140625
INFO:model.base_model:Epoch Step: 7805 Train Loss: 0.6937335729598999 elapsed: 0.0017962455749511719
INFO:model.base_model:Epoch Step: 7806 Train Loss: 0.6937073469161987 elapsed: 0.0015931129455566406
INFO:model.base_model:Epoch Step: 7807 Train Loss: 0.6936807036399841 elapsed: 0.0018067359924316406
INFO:model.base_model:Epoch Step: 7808 Train Loss: 0.69365394115448 elapsed: 0.0016980171203613281
INFO:model.base_model:Epoch Step: 7809 Train Loss: 0.693626880645752 elapsed: 0.0017852783203125
INFO:model.base_model:Epoch Step: 7810 Train Loss: 0.6935998201370239 elapsed: 0.0019958019256591797
INFO:model.base_model:Epoch Step: 7811 Train Loss: 0.6935727000236511 elapsed: 0.0018780231475830078
INFO:model.base_model:Epoch Step: 7812 Train Loss: 0.6935456991195679 elapsed: 0.001725912094116211

INFO:model.base_model:Epoch Step: 7885 Train Loss: 0.6951011419296265 elapsed: 0.0015399456024169922
INFO:model.base_model:Epoch Step: 7886 Train Loss: 0.695135235786438 elapsed: 0.0017268657684326172
INFO:model.base_model:Epoch Step: 7887 Train Loss: 0.6951663494110107 elapsed: 0.0016787052154541016
INFO:model.base_model:Epoch Step: 7888 Train Loss: 0.6951944828033447 elapsed: 0.0016758441925048828
INFO:model.base_model:Epoch Step: 7889 Train Loss: 0.6952193975448608 elapsed: 0.0016529560089111328
INFO:model.base_model:Epoch Step: 7890 Train Loss: 0.6952410936355591 elapsed: 0.0016481876373291016
INFO:model.base_model:Epoch Step: 7891 Train Loss: 0.6952593326568604 elapsed: 0.001501321792602539
INFO:model.base_model:Epoch Step: 7892 Train Loss: 0.6952741146087646 elapsed: 0.0018150806427001953
INFO:model.base_model:Epoch Step: 7893 Train Loss: 0.6952852010726929 elapsed: 0.0014951229095458984
INFO:model.base_model:Epoch Step: 7894 Train Loss: 0.695292592048645 elapsed: 0.0014829635620

INFO:model.base_model:Epoch Step: 7967 Train Loss: 0.6948559284210205 elapsed: 0.0015239715576171875
INFO:model.base_model:Epoch Step: 7968 Train Loss: 0.6949882507324219 elapsed: 0.0015149116516113281
INFO:model.base_model:Epoch Step: 7969 Train Loss: 0.6951230764389038 elapsed: 0.0015816688537597656
INFO:model.base_model:Epoch Step: 7970 Train Loss: 0.6952600479125977 elapsed: 0.001699209213256836
INFO:model.base_model:Epoch Step: 7971 Train Loss: 0.6953986883163452 elapsed: 0.0016028881072998047
INFO:model.base_model:Epoch Step: 7972 Train Loss: 0.6955385208129883 elapsed: 0.001750946044921875
INFO:model.base_model:Epoch Step: 7973 Train Loss: 0.6956793069839478 elapsed: 0.0017511844635009766
INFO:model.base_model:Epoch Step: 7974 Train Loss: 0.6958203315734863 elapsed: 0.0017290115356445312
INFO:model.base_model:Epoch Step: 7975 Train Loss: 0.6959612369537354 elapsed: 0.0018172264099121094
INFO:model.base_model:Epoch Step: 7976 Train Loss: 0.6961014866828918 elapsed: 0.001809835433

INFO:model.base_model:Epoch Step: 8049 Train Loss: 0.6932985186576843 elapsed: 0.0018458366394042969
INFO:model.base_model:Epoch Step: 8050 Train Loss: 0.6933705806732178 elapsed: 0.002268075942993164
INFO:model.base_model:Epoch Step: 8051 Train Loss: 0.6934566497802734 elapsed: 0.001753091812133789
INFO:model.base_model:Epoch Step: 8052 Train Loss: 0.6935567855834961 elapsed: 0.0019190311431884766
INFO:model.base_model:Epoch Step: 8053 Train Loss: 0.6936707496643066 elapsed: 0.0018723011016845703
INFO:model.base_model:Epoch Step: 8054 Train Loss: 0.6937984824180603 elapsed: 0.0016939640045166016
INFO:model.base_model:Epoch Step: 8055 Train Loss: 0.6939396262168884 elapsed: 0.0018699169158935547
INFO:model.base_model:Epoch Step: 8056 Train Loss: 0.6940939426422119 elapsed: 0.0017168521881103516
INFO:model.base_model:Epoch Step: 8057 Train Loss: 0.6942610740661621 elapsed: 0.002300262451171875
INFO:model.base_model:Epoch Step: 8058 Train Loss: 0.6944407224655151 elapsed: 0.0019178390502

INFO:model.base_model:Epoch Step: 8131 Train Loss: 0.6953714489936829 elapsed: 0.002300262451171875
INFO:model.base_model:Epoch Step: 8132 Train Loss: 0.6951161026954651 elapsed: 0.0020689964294433594
INFO:model.base_model:Epoch Step: 8133 Train Loss: 0.6948729753494263 elapsed: 0.0020127296447753906
INFO:model.base_model:Epoch Step: 8134 Train Loss: 0.6946428418159485 elapsed: 0.0029103755950927734
INFO:model.base_model:Epoch Step: 8135 Train Loss: 0.6944266557693481 elapsed: 0.002480745315551758
INFO:model.base_model:Epoch Step: 8136 Train Loss: 0.6942252516746521 elapsed: 0.002714872360229492
INFO:model.base_model:Epoch Step: 8137 Train Loss: 0.6940393447875977 elapsed: 0.002508878707885742
INFO:model.base_model:Epoch Step: 8138 Train Loss: 0.6938695907592773 elapsed: 0.0021779537200927734
INFO:model.base_model:Epoch Step: 8139 Train Loss: 0.6937165856361389 elapsed: 0.002302885055541992
INFO:model.base_model:Epoch Step: 8140 Train Loss: 0.6935808658599854 elapsed: 0.002691268920898

INFO:model.base_model:Epoch Step: 8213 Train Loss: 0.7078309059143066 elapsed: 0.0017428398132324219
INFO:model.base_model:Epoch Step: 8214 Train Loss: 0.7077533006668091 elapsed: 0.001547098159790039
INFO:model.base_model:Epoch Step: 8215 Train Loss: 0.7076584100723267 elapsed: 0.001703023910522461
INFO:model.base_model:Epoch Step: 8216 Train Loss: 0.707546591758728 elapsed: 0.0019838809967041016
INFO:model.base_model:Epoch Step: 8217 Train Loss: 0.7074182033538818 elapsed: 0.0018720626831054688
INFO:model.base_model:Epoch Step: 8218 Train Loss: 0.7072736620903015 elapsed: 0.0019009113311767578
INFO:model.base_model:Epoch Step: 8219 Train Loss: 0.70711350440979 elapsed: 0.0017306804656982422
INFO:model.base_model:Epoch Step: 8220 Train Loss: 0.7069382667541504 elapsed: 0.001857757568359375
INFO:model.base_model:Epoch Step: 8221 Train Loss: 0.706748366355896 elapsed: 0.0019690990447998047
INFO:model.base_model:Epoch Step: 8222 Train Loss: 0.7065445184707642 elapsed: 0.00239992141723632

INFO:model.base_model:Epoch Step: 8295 Train Loss: 0.6939307451248169 elapsed: 0.001955270767211914
INFO:model.base_model:Epoch Step: 8296 Train Loss: 0.6940429210662842 elapsed: 0.0017862319946289062
INFO:model.base_model:Epoch Step: 8297 Train Loss: 0.6941614151000977 elapsed: 0.0019698143005371094
INFO:model.base_model:Epoch Step: 8298 Train Loss: 0.6942858695983887 elapsed: 0.002025127410888672
INFO:model.base_model:Epoch Step: 8299 Train Loss: 0.6944159865379333 elapsed: 0.0027201175689697266
INFO:model.base_model:Epoch Step: 8300 Train Loss: 0.6945514678955078 elapsed: 0.0020639896392822266
INFO:model.base_model:Epoch Step: 8301 Train Loss: 0.6946918964385986 elapsed: 0.0017170906066894531
INFO:model.base_model:Epoch Step: 8302 Train Loss: 0.6948373317718506 elapsed: 0.0017161369323730469
INFO:model.base_model:Epoch Step: 8303 Train Loss: 0.6949871778488159 elapsed: 0.0019419193267822266
INFO:model.base_model:Epoch Step: 8304 Train Loss: 0.6951413154602051 elapsed: 0.002230167388

INFO:model.base_model:Epoch Step: 8377 Train Loss: 0.7004523277282715 elapsed: 0.0015969276428222656
INFO:model.base_model:Epoch Step: 8378 Train Loss: 0.7003213167190552 elapsed: 0.001585245132446289
INFO:model.base_model:Epoch Step: 8379 Train Loss: 0.7001860737800598 elapsed: 0.0015799999237060547
INFO:model.base_model:Epoch Step: 8380 Train Loss: 0.7000468969345093 elapsed: 0.0015778541564941406
INFO:model.base_model:Epoch Step: 8381 Train Loss: 0.6999040842056274 elapsed: 0.0016491413116455078
INFO:model.base_model:Epoch Step: 8382 Train Loss: 0.6997578144073486 elapsed: 0.0016832351684570312
INFO:model.base_model:Epoch Step: 8383 Train Loss: 0.699608325958252 elapsed: 0.0015149116516113281
INFO:model.base_model:Epoch Step: 8384 Train Loss: 0.699455976486206 elapsed: 0.0016798973083496094
INFO:model.base_model:Epoch Step: 8385 Train Loss: 0.6993008852005005 elapsed: 0.0015981197357177734
INFO:model.base_model:Epoch Step: 8386 Train Loss: 0.6991434097290039 elapsed: 0.0017318725585

INFO:model.base_model:Epoch Step: 8459 Train Loss: 0.6941831111907959 elapsed: 0.0016281604766845703
INFO:model.base_model:Epoch Step: 8460 Train Loss: 0.694283664226532 elapsed: 0.0015621185302734375
INFO:model.base_model:Epoch Step: 8461 Train Loss: 0.694388210773468 elapsed: 0.001499176025390625
INFO:model.base_model:Epoch Step: 8462 Train Loss: 0.6944966316223145 elapsed: 0.0016379356384277344
INFO:model.base_model:Epoch Step: 8463 Train Loss: 0.6946089267730713 elapsed: 0.0017306804656982422
INFO:model.base_model:Epoch Step: 8464 Train Loss: 0.6947248578071594 elapsed: 0.0019059181213378906
INFO:model.base_model:Epoch Step: 8465 Train Loss: 0.6948443651199341 elapsed: 0.0016438961029052734
INFO:model.base_model:Epoch Step: 8466 Train Loss: 0.6949673891067505 elapsed: 0.0017240047454833984
INFO:model.base_model:Epoch Step: 8467 Train Loss: 0.6950936317443848 elapsed: 0.0016021728515625
INFO:model.base_model:Epoch Step: 8468 Train Loss: 0.6952232122421265 elapsed: 0.0015928745269775

INFO:model.base_model:Epoch Step: 8541 Train Loss: 0.7038968801498413 elapsed: 0.002251863479614258
INFO:model.base_model:Epoch Step: 8542 Train Loss: 0.7038861513137817 elapsed: 0.0017180442810058594
INFO:model.base_model:Epoch Step: 8543 Train Loss: 0.7038700580596924 elapsed: 0.0018768310546875
INFO:model.base_model:Epoch Step: 8544 Train Loss: 0.7038486003875732 elapsed: 0.004286766052246094
INFO:model.base_model:Epoch Step: 8545 Train Loss: 0.7038216590881348 elapsed: 0.003977060317993164
INFO:model.base_model:Epoch Step: 8546 Train Loss: 0.7037893533706665 elapsed: 0.001995086669921875
INFO:model.base_model:Epoch Step: 8547 Train Loss: 0.7037517428398132 elapsed: 0.0021479129791259766
INFO:model.base_model:Epoch Step: 8548 Train Loss: 0.7037087678909302 elapsed: 0.0020041465759277344
INFO:model.base_model:Epoch Step: 8549 Train Loss: 0.7036604881286621 elapsed: 0.0021080970764160156
INFO:model.base_model:Epoch Step: 8550 Train Loss: 0.7036070823669434 elapsed: 0.00197005271911621

INFO:model.base_model:Epoch Step: 8623 Train Loss: 0.6937456130981445 elapsed: 0.0017428398132324219
INFO:model.base_model:Epoch Step: 8624 Train Loss: 0.6936684250831604 elapsed: 0.0017096996307373047
INFO:model.base_model:Epoch Step: 8625 Train Loss: 0.6935962438583374 elapsed: 0.001486063003540039
INFO:model.base_model:Epoch Step: 8626 Train Loss: 0.6935291290283203 elapsed: 0.0015430450439453125
INFO:model.base_model:Epoch Step: 8627 Train Loss: 0.6934672594070435 elapsed: 0.0016529560089111328
INFO:model.base_model:Epoch Step: 8628 Train Loss: 0.6934105157852173 elapsed: 0.0017130374908447266
INFO:model.base_model:Epoch Step: 8629 Train Loss: 0.6933591961860657 elapsed: 0.0015969276428222656
INFO:model.base_model:Epoch Step: 8630 Train Loss: 0.6933133006095886 elapsed: 0.001650094985961914
INFO:model.base_model:Epoch Step: 8631 Train Loss: 0.6932728290557861 elapsed: 0.0016181468963623047
INFO:model.base_model:Epoch Step: 8632 Train Loss: 0.6932379007339478 elapsed: 0.001579046249

INFO:model.base_model:Epoch Step: 8705 Train Loss: 0.7029730081558228 elapsed: 0.0015931129455566406
INFO:model.base_model:Epoch Step: 8706 Train Loss: 0.7031372785568237 elapsed: 0.0017058849334716797
INFO:model.base_model:Epoch Step: 8707 Train Loss: 0.7032966017723083 elapsed: 0.0016319751739501953
INFO:model.base_model:Epoch Step: 8708 Train Loss: 0.7034507393836975 elapsed: 0.0019059181213378906
INFO:model.base_model:Epoch Step: 8709 Train Loss: 0.7035993337631226 elapsed: 0.0016620159149169922
INFO:model.base_model:Epoch Step: 8710 Train Loss: 0.7037423849105835 elapsed: 0.0016949176788330078
INFO:model.base_model:Epoch Step: 8711 Train Loss: 0.7038793563842773 elapsed: 0.0015287399291992188
INFO:model.base_model:Epoch Step: 8712 Train Loss: 0.7040101289749146 elapsed: 0.0017819404602050781
INFO:model.base_model:Epoch Step: 8713 Train Loss: 0.7041347026824951 elapsed: 0.0017457008361816406
INFO:model.base_model:Epoch Step: 8714 Train Loss: 0.7042526602745056 elapsed: 0.0016229152

INFO:model.base_model:Epoch Step: 8787 Train Loss: 0.6965254545211792 elapsed: 0.001825094223022461
INFO:model.base_model:Epoch Step: 8788 Train Loss: 0.6963603496551514 elapsed: 0.001981973648071289
INFO:model.base_model:Epoch Step: 8789 Train Loss: 0.6961988210678101 elapsed: 0.0016667842864990234
INFO:model.base_model:Epoch Step: 8790 Train Loss: 0.6960408687591553 elapsed: 0.0019898414611816406
INFO:model.base_model:Epoch Step: 8791 Train Loss: 0.6958867311477661 elapsed: 0.001767873764038086
INFO:model.base_model:Epoch Step: 8792 Train Loss: 0.6957364082336426 elapsed: 0.0017600059509277344
INFO:model.base_model:Epoch Step: 8793 Train Loss: 0.6955900192260742 elapsed: 0.0017118453979492188
INFO:model.base_model:Epoch Step: 8794 Train Loss: 0.6954476833343506 elapsed: 0.001766204833984375
INFO:model.base_model:Epoch Step: 8795 Train Loss: 0.6953094005584717 elapsed: 0.0017290115356445312
INFO:model.base_model:Epoch Step: 8796 Train Loss: 0.6951752305030823 elapsed: 0.00174307823181

INFO:model.base_model:Epoch Step: 8869 Train Loss: 0.6952316761016846 elapsed: 0.001631021499633789
INFO:model.base_model:Epoch Step: 8870 Train Loss: 0.6953121423721313 elapsed: 0.0015819072723388672
INFO:model.base_model:Epoch Step: 8871 Train Loss: 0.6953930854797363 elapsed: 0.0016508102416992188
INFO:model.base_model:Epoch Step: 8872 Train Loss: 0.6954742670059204 elapsed: 0.0016210079193115234
INFO:model.base_model:Epoch Step: 8873 Train Loss: 0.6955558061599731 elapsed: 0.0018618106842041016
INFO:model.base_model:Epoch Step: 8874 Train Loss: 0.695637583732605 elapsed: 0.0017430782318115234
INFO:model.base_model:Epoch Step: 8875 Train Loss: 0.6957195997238159 elapsed: 0.0017299652099609375
INFO:model.base_model:Epoch Step: 8876 Train Loss: 0.6958016157150269 elapsed: 0.0016031265258789062
INFO:model.base_model:Epoch Step: 8877 Train Loss: 0.6958837509155273 elapsed: 0.0016050338745117188
INFO:model.base_model:Epoch Step: 8878 Train Loss: 0.6959659457206726 elapsed: 0.001791954040

INFO:model.base_model:Epoch Step: 8951 Train Loss: 0.6992263793945312 elapsed: 0.0020360946655273438
INFO:model.base_model:Epoch Step: 8952 Train Loss: 0.6992124319076538 elapsed: 0.002137899398803711
INFO:model.base_model:Epoch Step: 8953 Train Loss: 0.6991968154907227 elapsed: 0.0016698837280273438
INFO:model.base_model:Epoch Step: 8954 Train Loss: 0.6991795897483826 elapsed: 0.005132198333740234
INFO:model.base_model:Epoch Step: 8955 Train Loss: 0.699160635471344 elapsed: 0.001795053482055664
INFO:model.base_model:Epoch Step: 8956 Train Loss: 0.6991400718688965 elapsed: 0.0018210411071777344
INFO:model.base_model:Epoch Step: 8957 Train Loss: 0.69911789894104 elapsed: 0.0018601417541503906
INFO:model.base_model:Epoch Step: 8958 Train Loss: 0.6990941166877747 elapsed: 0.0018229484558105469
INFO:model.base_model:Epoch Step: 8959 Train Loss: 0.6990687251091003 elapsed: 0.002125263214111328
INFO:model.base_model:Epoch Step: 8960 Train Loss: 0.6990418434143066 elapsed: 0.00186300277709960

INFO:model.base_model:Epoch Step: 9033 Train Loss: 0.6947951316833496 elapsed: 0.0016942024230957031
INFO:model.base_model:Epoch Step: 9034 Train Loss: 0.6947358846664429 elapsed: 0.0019328594207763672
INFO:model.base_model:Epoch Step: 9035 Train Loss: 0.694677472114563 elapsed: 0.0018379688262939453
INFO:model.base_model:Epoch Step: 9036 Train Loss: 0.6946197748184204 elapsed: 0.0017750263214111328
INFO:model.base_model:Epoch Step: 9037 Train Loss: 0.6945628523826599 elapsed: 0.002171039581298828
INFO:model.base_model:Epoch Step: 9038 Train Loss: 0.6945067644119263 elapsed: 0.001638174057006836
INFO:model.base_model:Epoch Step: 9039 Train Loss: 0.6944515705108643 elapsed: 0.001847982406616211
INFO:model.base_model:Epoch Step: 9040 Train Loss: 0.6943972706794739 elapsed: 0.0017669200897216797
INFO:model.base_model:Epoch Step: 9041 Train Loss: 0.6943438053131104 elapsed: 0.001589059829711914
INFO:model.base_model:Epoch Step: 9042 Train Loss: 0.694291353225708 elapsed: 0.0015571117401123

INFO:model.base_model:Epoch Step: 9115 Train Loss: 0.6938440203666687 elapsed: 0.0017428398132324219
INFO:model.base_model:Epoch Step: 9116 Train Loss: 0.6938871741294861 elapsed: 0.0026311874389648438
INFO:model.base_model:Epoch Step: 9117 Train Loss: 0.6939315795898438 elapsed: 0.002424001693725586
INFO:model.base_model:Epoch Step: 9118 Train Loss: 0.6939769983291626 elapsed: 0.0017850399017333984
INFO:model.base_model:Epoch Step: 9119 Train Loss: 0.694023609161377 elapsed: 0.0019152164459228516
INFO:model.base_model:Epoch Step: 9120 Train Loss: 0.6940712332725525 elapsed: 0.0016560554504394531
INFO:model.base_model:Epoch Step: 9121 Train Loss: 0.694119930267334 elapsed: 0.0016341209411621094
INFO:model.base_model:Epoch Step: 9122 Train Loss: 0.6941697597503662 elapsed: 0.0017609596252441406
INFO:model.base_model:Epoch Step: 9123 Train Loss: 0.6942206025123596 elapsed: 0.0016498565673828125
INFO:model.base_model:Epoch Step: 9124 Train Loss: 0.6942723989486694 elapsed: 0.0018892288208

INFO:model.base_model:Epoch Step: 9197 Train Loss: 0.6987718343734741 elapsed: 0.0017099380493164062
INFO:model.base_model:Epoch Step: 9198 Train Loss: 0.6988121271133423 elapsed: 0.0022346973419189453
INFO:model.base_model:Epoch Step: 9199 Train Loss: 0.6988511085510254 elapsed: 0.0019550323486328125
INFO:model.base_model:Epoch Step: 9200 Train Loss: 0.6988886594772339 elapsed: 0.0023469924926757812
INFO:model.base_model:Epoch Step: 9201 Train Loss: 0.6989248991012573 elapsed: 0.0020661354064941406
INFO:model.base_model:Epoch Step: 9202 Train Loss: 0.6989597082138062 elapsed: 0.0019538402557373047
INFO:model.base_model:Epoch Step: 9203 Train Loss: 0.6989930272102356 elapsed: 0.001811981201171875
INFO:model.base_model:Epoch Step: 9204 Train Loss: 0.6990247964859009 elapsed: 0.0020411014556884766
INFO:model.base_model:Epoch Step: 9205 Train Loss: 0.6990551948547363 elapsed: 0.0018668174743652344
INFO:model.base_model:Epoch Step: 9206 Train Loss: 0.6990840435028076 elapsed: 0.00186705589

INFO:model.base_model:Epoch Step: 9279 Train Loss: 0.697272539138794 elapsed: 0.0018198490142822266
INFO:model.base_model:Epoch Step: 9280 Train Loss: 0.6972106695175171 elapsed: 0.0025331974029541016
INFO:model.base_model:Epoch Step: 9281 Train Loss: 0.6971483826637268 elapsed: 0.002279043197631836
INFO:model.base_model:Epoch Step: 9282 Train Loss: 0.697085976600647 elapsed: 0.002095937728881836
INFO:model.base_model:Epoch Step: 9283 Train Loss: 0.6970232725143433 elapsed: 0.0022361278533935547
INFO:model.base_model:Epoch Step: 9284 Train Loss: 0.6969603300094604 elapsed: 0.004884958267211914
INFO:model.base_model:Epoch Step: 9285 Train Loss: 0.6968972682952881 elapsed: 0.0029649734497070312
INFO:model.base_model:Epoch Step: 9286 Train Loss: 0.6968340873718262 elapsed: 0.001878976821899414
INFO:model.base_model:Epoch Step: 9287 Train Loss: 0.6967707276344299 elapsed: 0.0017218589782714844
INFO:model.base_model:Epoch Step: 9288 Train Loss: 0.6967073082923889 elapsed: 0.0043830871582031

INFO:model.base_model:Epoch Step: 9361 Train Loss: 0.6933295130729675 elapsed: 0.0022580623626708984
INFO:model.base_model:Epoch Step: 9362 Train Loss: 0.6933121681213379 elapsed: 0.0021538734436035156
INFO:model.base_model:Epoch Step: 9363 Train Loss: 0.6932957172393799 elapsed: 0.0023190975189208984
INFO:model.base_model:Epoch Step: 9364 Train Loss: 0.6932802200317383 elapsed: 0.002067089080810547
INFO:model.base_model:Epoch Step: 9365 Train Loss: 0.6932654976844788 elapsed: 0.002079010009765625
INFO:model.base_model:Epoch Step: 9366 Train Loss: 0.6932516098022461 elapsed: 0.002048015594482422
INFO:model.base_model:Epoch Step: 9367 Train Loss: 0.6932387351989746 elapsed: 0.0022521018981933594
INFO:model.base_model:Epoch Step: 9368 Train Loss: 0.69322669506073 elapsed: 0.0021610260009765625
INFO:model.base_model:Epoch Step: 9369 Train Loss: 0.6932154893875122 elapsed: 0.0020089149475097656
INFO:model.base_model:Epoch Step: 9370 Train Loss: 0.6932052373886108 elapsed: 0.001784086227416

INFO:model.base_model:Epoch Step: 9443 Train Loss: 0.6943069696426392 elapsed: 0.0017919540405273438
INFO:model.base_model:Epoch Step: 9444 Train Loss: 0.6943389177322388 elapsed: 0.0016639232635498047
INFO:model.base_model:Epoch Step: 9445 Train Loss: 0.6943710446357727 elapsed: 0.0016758441925048828
INFO:model.base_model:Epoch Step: 9446 Train Loss: 0.694403350353241 elapsed: 0.0017478466033935547
INFO:model.base_model:Epoch Step: 9447 Train Loss: 0.6944358348846436 elapsed: 0.001851797103881836
INFO:model.base_model:Epoch Step: 9448 Train Loss: 0.6944684982299805 elapsed: 0.0015869140625
INFO:model.base_model:Epoch Step: 9449 Train Loss: 0.6945013403892517 elapsed: 0.0015780925750732422
INFO:model.base_model:Epoch Step: 9450 Train Loss: 0.6945342421531677 elapsed: 0.0016601085662841797
INFO:model.base_model:Epoch Step: 9451 Train Loss: 0.6945673227310181 elapsed: 0.0015952587127685547
INFO:model.base_model:Epoch Step: 9452 Train Loss: 0.6946004629135132 elapsed: 0.001683950424194336

INFO:model.base_model:Epoch Step: 9525 Train Loss: 0.696603000164032 elapsed: 0.0017518997192382812
INFO:model.base_model:Epoch Step: 9526 Train Loss: 0.6966158151626587 elapsed: 0.0016829967498779297
INFO:model.base_model:Epoch Step: 9527 Train Loss: 0.6966280937194824 elapsed: 0.0014410018920898438
INFO:model.base_model:Epoch Step: 9528 Train Loss: 0.6966397762298584 elapsed: 0.0015881061553955078
INFO:model.base_model:Epoch Step: 9529 Train Loss: 0.6966509222984314 elapsed: 0.0016126632690429688
INFO:model.base_model:Epoch Step: 9530 Train Loss: 0.6966614127159119 elapsed: 0.001847982406616211
INFO:model.base_model:Epoch Step: 9531 Train Loss: 0.6966713666915894 elapsed: 0.0017201900482177734
INFO:model.base_model:Epoch Step: 9532 Train Loss: 0.6966806650161743 elapsed: 0.00162506103515625
INFO:model.base_model:Epoch Step: 9533 Train Loss: 0.6966893076896667 elapsed: 0.0017359256744384766
INFO:model.base_model:Epoch Step: 9534 Train Loss: 0.6966973543167114 elapsed: 0.00179934501647

INFO:model.base_model:Epoch Step: 9607 Train Loss: 0.6956931352615356 elapsed: 0.0017130374908447266
INFO:model.base_model:Epoch Step: 9608 Train Loss: 0.6956621408462524 elapsed: 0.0018198490142822266
INFO:model.base_model:Epoch Step: 9609 Train Loss: 0.6956309080123901 elapsed: 0.0017099380493164062
INFO:model.base_model:Epoch Step: 9610 Train Loss: 0.695599377155304 elapsed: 0.0016109943389892578
INFO:model.base_model:Epoch Step: 9611 Train Loss: 0.6955676674842834 elapsed: 0.0016429424285888672
INFO:model.base_model:Epoch Step: 9612 Train Loss: 0.6955357193946838 elapsed: 0.0016489028930664062
INFO:model.base_model:Epoch Step: 9613 Train Loss: 0.6955035924911499 elapsed: 0.001806020736694336
INFO:model.base_model:Epoch Step: 9614 Train Loss: 0.6954712867736816 elapsed: 0.0016760826110839844
INFO:model.base_model:Epoch Step: 9615 Train Loss: 0.6954387426376343 elapsed: 0.0018818378448486328
INFO:model.base_model:Epoch Step: 9616 Train Loss: 0.6954060792922974 elapsed: 0.001486778259

INFO:model.base_model:Epoch Step: 9689 Train Loss: 0.6933338642120361 elapsed: 0.0018148422241210938
INFO:model.base_model:Epoch Step: 9690 Train Loss: 0.6933192014694214 elapsed: 0.0018799304962158203
INFO:model.base_model:Epoch Step: 9691 Train Loss: 0.6933050155639648 elapsed: 0.0018410682678222656
INFO:model.base_model:Epoch Step: 9692 Train Loss: 0.693291425704956 elapsed: 0.0017957687377929688
INFO:model.base_model:Epoch Step: 9693 Train Loss: 0.6932784914970398 elapsed: 0.0015969276428222656
INFO:model.base_model:Epoch Step: 9694 Train Loss: 0.6932660341262817 elapsed: 0.001703023910522461
INFO:model.base_model:Epoch Step: 9695 Train Loss: 0.6932542324066162 elapsed: 0.001569986343383789
INFO:model.base_model:Epoch Step: 9696 Train Loss: 0.6932430267333984 elapsed: 0.0026102066040039062
INFO:model.base_model:Epoch Step: 9697 Train Loss: 0.6932324171066284 elapsed: 0.0016760826110839844
INFO:model.base_model:Epoch Step: 9698 Train Loss: 0.6932224035263062 elapsed: 0.0015289783477

INFO:model.base_model:Epoch Step: 9771 Train Loss: 0.6942089796066284 elapsed: 0.0017321109771728516
INFO:model.base_model:Epoch Step: 9772 Train Loss: 0.6942442655563354 elapsed: 0.0019309520721435547
INFO:model.base_model:Epoch Step: 9773 Train Loss: 0.6942800283432007 elapsed: 0.0018239021301269531
INFO:model.base_model:Epoch Step: 9774 Train Loss: 0.6943161487579346 elapsed: 0.0017910003662109375
INFO:model.base_model:Epoch Step: 9775 Train Loss: 0.6943528652191162 elapsed: 0.0017042160034179688
INFO:model.base_model:Epoch Step: 9776 Train Loss: 0.6943899393081665 elapsed: 0.0016551017761230469
INFO:model.base_model:Epoch Step: 9777 Train Loss: 0.694427490234375 elapsed: 0.001766204833984375
INFO:model.base_model:Epoch Step: 9778 Train Loss: 0.6944653987884521 elapsed: 0.0017867088317871094
INFO:model.base_model:Epoch Step: 9779 Train Loss: 0.6945037841796875 elapsed: 0.0018038749694824219
INFO:model.base_model:Epoch Step: 9780 Train Loss: 0.6945425271987915 elapsed: 0.001955032348

INFO:model.base_model:Epoch Step: 9853 Train Loss: 0.6976787447929382 elapsed: 0.0017688274383544922
INFO:model.base_model:Epoch Step: 9854 Train Loss: 0.69771409034729 elapsed: 0.0015380382537841797
INFO:model.base_model:Epoch Step: 9855 Train Loss: 0.6977488994598389 elapsed: 0.0014231204986572266
INFO:model.base_model:Epoch Step: 9856 Train Loss: 0.6977831125259399 elapsed: 0.0018908977508544922
INFO:model.base_model:Epoch Step: 9857 Train Loss: 0.697816789150238 elapsed: 0.001847982406616211
INFO:model.base_model:Epoch Step: 9858 Train Loss: 0.6978499293327332 elapsed: 0.0017158985137939453
INFO:model.base_model:Epoch Step: 9859 Train Loss: 0.6978824734687805 elapsed: 0.0018429756164550781
INFO:model.base_model:Epoch Step: 9860 Train Loss: 0.6979143619537354 elapsed: 0.0016551017761230469
INFO:model.base_model:Epoch Step: 9861 Train Loss: 0.6979458332061768 elapsed: 0.0015707015991210938
INFO:model.base_model:Epoch Step: 9862 Train Loss: 0.6979765892028809 elapsed: 0.00155282020568

INFO:model.base_model:Epoch Step: 9935 Train Loss: 0.6982314586639404 elapsed: 0.0017781257629394531
INFO:model.base_model:Epoch Step: 9936 Train Loss: 0.6982080936431885 elapsed: 0.001682281494140625
INFO:model.base_model:Epoch Step: 9937 Train Loss: 0.6981841325759888 elapsed: 0.0016388893127441406
INFO:model.base_model:Epoch Step: 9938 Train Loss: 0.6981595754623413 elapsed: 0.0017178058624267578
INFO:model.base_model:Epoch Step: 9939 Train Loss: 0.6981345415115356 elapsed: 0.001628875732421875
INFO:model.base_model:Epoch Step: 9940 Train Loss: 0.6981088519096375 elapsed: 0.0016109943389892578
INFO:model.base_model:Epoch Step: 9941 Train Loss: 0.6980826258659363 elapsed: 0.0016629695892333984
INFO:model.base_model:Epoch Step: 9942 Train Loss: 0.6980559229850769 elapsed: 0.0017118453979492188
INFO:model.base_model:Epoch Step: 9943 Train Loss: 0.6980286836624146 elapsed: 0.0016193389892578125
INFO:model.base_model:Epoch Step: 9944 Train Loss: 0.6980009078979492 elapsed: 0.001734018325

In [None]:

for epoch in range(epochs):
    # **************** Train ****************
    train_result_info = model.train_on_epoch(epoch, train_loader, model, loss_func, optimizer)
    train_log[f'epoch_train_{epoch}'] = train_result_info
    logger.debug(f"epoch {epoch} train finished. train_result_info:{train_result_info}")
    
    # ************** validation *********************
    val_result = model.validation_on_epoch(epoch, model, val_loader, val_metrics)
    val_log[f'epoch_val_{epoch}'] = val_result
    logger.info(f"epoch {epoch} val result: {val_result}")

    if not val_log:
        best_model = save_model(config_name, model.copy(), epoch)
    elif val_result['auc'] >= max(val_log.values(), key=lambda x : x['auc'])['auc']:
        best_model = save_model(config_name, model.copy(), epoch)


INFO:model.base_model:Epoch Step: 0 Train Loss: 78.20327758789062 elapsed: 0.8567650318145752
DEBUG:base_model:epoch 0 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(78.2033, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:0, validation elapsed_time:0.08505606651306152
INFO:base_model:epoch 0 val result: {'loss': tensor(0.6742), 'auc': 0.4935604476051035}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0000.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0000.pth


INFO:model.base_model:Epoch Step: 1 Train Loss: 71.86639404296875 elapsed: 0.6606976985931396
DEBUG:base_model:epoch 1 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.8664, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1, validation elapsed_time:0.08582878112792969
INFO:base_model:epoch 1 val result: {'loss': tensor(0.6067), 'auc': 0.5}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0001.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0001.pth


INFO:model.base_model:Epoch Step: 2 Train Loss: 67.47532653808594 elapsed: 0.7791690826416016
DEBUG:base_model:epoch 2 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.4753, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2, validation elapsed_time:0.2296457290649414
INFO:base_model:epoch 2 val result: {'loss': tensor(0.5874), 'auc': 0.5}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0002.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0002.pth


INFO:model.base_model:Epoch Step: 3 Train Loss: 70.15928649902344 elapsed: 0.8057787418365479
DEBUG:base_model:epoch 3 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.1593, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3, validation elapsed_time:0.09131097793579102
INFO:base_model:epoch 3 val result: {'loss': tensor(0.5892), 'auc': 0.5}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0003.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0003.pth


INFO:model.base_model:Epoch Step: 4 Train Loss: 67.29615020751953 elapsed: 0.7332003116607666
DEBUG:base_model:epoch 4 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.2962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4, validation elapsed_time:0.08645319938659668
INFO:base_model:epoch 4 val result: {'loss': tensor(0.6023), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0004.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0004.pth


INFO:model.base_model:Epoch Step: 5 Train Loss: 69.33292388916016 elapsed: 0.7345247268676758
DEBUG:base_model:epoch 5 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.3329, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5, validation elapsed_time:0.08733916282653809
INFO:base_model:epoch 5 val result: {'loss': tensor(0.6118), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6 Train Loss: 67.25287628173828 elapsed: 0.7277266979217529
DEBUG:base_model:epoch 6 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.2529, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6, validation elapsed_time:0.1540219783782959
INFO:base_model:epoch 6 val result: {'loss': tensor(0.5799), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7 Train Loss: 66.82549285888672 elapsed: 0.7538151741027832
DEBUG:base_model:epoch 7 train finished. train_result_info:{'total_

INFO:model.base_model:cur_epoch:24, validation elapsed_time:0.08392620086669922
INFO:base_model:epoch 24 val result: {'loss': tensor(0.5811), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 25 Train Loss: 66.70577239990234 elapsed: 0.6706039905548096
DEBUG:base_model:epoch 25 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.7058, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:25, validation elapsed_time:0.08544778823852539
INFO:base_model:epoch 25 val result: {'loss': tensor(0.5910), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 26 Train Loss: 67.32447052001953 elapsed: 0.7528092861175537
DEBUG:base_model:epoch 26 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3245, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:26, validation elapsed_time:0.16180992126464844
INFO:base_model:epoch 26 val result: {'loss': tensor(0.5886), 'auc': 0.5

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0041.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0041.pth


INFO:model.base_model:Epoch Step: 42 Train Loss: 67.13821411132812 elapsed: 0.6555068492889404
DEBUG:base_model:epoch 42 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1382, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:42, validation elapsed_time:0.1541759967803955
INFO:base_model:epoch 42 val result: {'loss': tensor(0.5796), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 43 Train Loss: 67.22042083740234 elapsed: 0.7800130844116211
DEBUG:base_model:epoch 43 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.2204, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:43, validation elapsed_time:0.09423089027404785
INFO:base_model:epoch 43 val result: {'loss': tensor(0.5996), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 44 Train Loss: 66.86862182617188 elapsed: 0.655332088470459
DEBUG:base_model:epoch 44 train finished. train_result_info

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0046.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0046.pth


INFO:model.base_model:Epoch Step: 47 Train Loss: 67.59391021728516 elapsed: 1.0103821754455566
DEBUG:base_model:epoch 47 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.5939, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:47, validation elapsed_time:0.19698715209960938
INFO:base_model:epoch 47 val result: {'loss': tensor(0.5937), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 48 Train Loss: 66.65339660644531 elapsed: 0.5953679084777832
DEBUG:base_model:epoch 48 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.6534, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:48, validation elapsed_time:0.10861897468566895
INFO:base_model:epoch 48 val result: {'loss': tensor(0.5797), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 49 Train Loss: 67.50040435791016 elapsed: 0.6204032897949219
DEBUG:base_model:epoch 49 t

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0052.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0052.pth


INFO:model.base_model:Epoch Step: 53 Train Loss: 67.17709350585938 elapsed: 0.6243209838867188
DEBUG:base_model:epoch 53 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1771, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:53, validation elapsed_time:0.2845120429992676
INFO:base_model:epoch 53 val result: {'loss': tensor(0.5849), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0053.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0053.pth


INFO:model.base_model:Epoch Step: 54 Train Loss: 66.99456787109375 elapsed: 0.7782618999481201
DEBUG:base_model:epoch 54 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.9946, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:54, validation elapsed_time:0.12302803993225098
INFO:base_model:epoch 54 val result: {'loss': tensor(0.5826), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 55 Train Loss: 67.74158477783203 elapsed: 0.616034984588623
DEBUG:base_model:epoch 55 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.7416, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:55, validation elapsed_time:0.0958108901977539
INFO:base_model:epoch 55 val result: {'loss': tensor(0.6021), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0055.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0055.pth


INFO:model.base_model:Epoch Step: 56 Train Loss: 67.0916519165039 elapsed: 0.671457052230835
DEBUG:base_model:epoch 56 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.0917, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:56, validation elapsed_time:0.08218097686767578
INFO:base_model:epoch 56 val result: {'loss': tensor(0.5786), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 57 Train Loss: 67.37477111816406 elapsed: 0.9366710186004639
DEBUG:base_model:epoch 57 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3748, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:57, validation elapsed_time:0.09090018272399902
INFO:base_model:epoch 57 val result: {'loss': tensor(0.5876), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0057.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0057.pth


INFO:model.base_model:Epoch Step: 58 Train Loss: 67.29767608642578 elapsed: 0.6757738590240479
DEBUG:base_model:epoch 58 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.2977, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:58, validation elapsed_time:0.10399127006530762
INFO:base_model:epoch 58 val result: {'loss': tensor(0.5819), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0058.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0058.pth


INFO:model.base_model:Epoch Step: 59 Train Loss: 67.35686492919922 elapsed: 0.6282210350036621
DEBUG:base_model:epoch 59 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3569, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:59, validation elapsed_time:0.0995779037475586
INFO:base_model:epoch 59 val result: {'loss': tensor(0.5969), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 60 Train Loss: 66.80592346191406 elapsed: 0.8170828819274902
DEBUG:base_model:epoch 60 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.8059, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:60, validation elapsed_time:0.09492206573486328
INFO:base_model:epoch 60 val result: {'loss': tensor(0.5808), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 61 Train Loss: 68.06983947753906 elapsed: 0.7014241218566895
DEBUG:base_model:epoch 61 train finished. t

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0065.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0065.pth


INFO:model.base_model:Epoch Step: 66 Train Loss: 66.95878601074219 elapsed: 0.7092061042785645
DEBUG:base_model:epoch 66 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.9588, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:66, validation elapsed_time:0.0979149341583252
INFO:base_model:epoch 66 val result: {'loss': tensor(0.6002), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 67 Train Loss: 67.31070709228516 elapsed: 0.9109511375427246
DEBUG:base_model:epoch 67 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3107, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:67, validation elapsed_time:0.12611699104309082
INFO:base_model:epoch 67 val result: {'loss': tensor(0.5784), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0067.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0067.pth


INFO:model.base_model:Epoch Step: 68 Train Loss: 67.93157958984375 elapsed: 0.6596062183380127
DEBUG:base_model:epoch 68 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.9316, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:68, validation elapsed_time:0.08536219596862793
INFO:base_model:epoch 68 val result: {'loss': tensor(0.5825), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 69 Train Loss: 67.31100463867188 elapsed: 0.6538419723510742
DEBUG:base_model:epoch 69 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3110, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:69, validation elapsed_time:0.16307711601257324
INFO:base_model:epoch 69 val result: {'loss': tensor(0.6078), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 70 Train Loss: 68.33735656738281 elapsed: 0.8616058826446533
DEBUG:base_model:epoch 70 train finished. 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0078.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0078.pth


INFO:model.base_model:Epoch Step: 79 Train Loss: 67.84319305419922 elapsed: 1.02512788772583
DEBUG:base_model:epoch 79 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.8432, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:79, validation elapsed_time:0.10666012763977051
INFO:base_model:epoch 79 val result: {'loss': tensor(0.6167), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 80 Train Loss: 68.29589080810547 elapsed: 0.8569738864898682
DEBUG:base_model:epoch 80 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.2959, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:80, validation elapsed_time:0.08891487121582031
INFO:base_model:epoch 80 val result: {'loss': tensor(0.6117), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0080.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0080.pth


INFO:model.base_model:Epoch Step: 81 Train Loss: 68.93241119384766 elapsed: 0.6226117610931396
DEBUG:base_model:epoch 81 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.9324, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:81, validation elapsed_time:0.08635115623474121
INFO:base_model:epoch 81 val result: {'loss': tensor(0.5957), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 82 Train Loss: 69.3417739868164 elapsed: 0.6871738433837891
DEBUG:base_model:epoch 82 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.3418, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:82, validation elapsed_time:0.16550111770629883
INFO:base_model:epoch 82 val result: {'loss': tensor(0.5943), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0082.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0082.pth


INFO:model.base_model:Epoch Step: 83 Train Loss: 67.14940643310547 elapsed: 0.8618109226226807
DEBUG:base_model:epoch 83 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1494, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:83, validation elapsed_time:0.1052999496459961
INFO:base_model:epoch 83 val result: {'loss': tensor(0.5820), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 84 Train Loss: 67.37820434570312 elapsed: 0.6819870471954346
DEBUG:base_model:epoch 84 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.3782, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:84, validation elapsed_time:0.10804200172424316
INFO:base_model:epoch 84 val result: {'loss': tensor(0.5784), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 85 Train Loss: 66.9190902709961 elapsed: 0.6196169853210449
DEBUG:base_model:epoch 85 train finished. tr

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0086.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0086.pth


INFO:model.base_model:Epoch Step: 87 Train Loss: 67.11455535888672 elapsed: 0.7735731601715088
DEBUG:base_model:epoch 87 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1146, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:87, validation elapsed_time:0.08379578590393066
INFO:base_model:epoch 87 val result: {'loss': tensor(0.5905), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 88 Train Loss: 68.64495086669922 elapsed: 0.6643388271331787
DEBUG:base_model:epoch 88 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.6450, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:88, validation elapsed_time:0.0838477611541748
INFO:base_model:epoch 88 val result: {'loss': tensor(0.5808), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 89 Train Loss: 66.93873596191406 elapsed: 0.7881858348846436
DEBUG:base_model:epoch 89 train finished. t

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0094.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0094.pth


INFO:model.base_model:Epoch Step: 95 Train Loss: 67.09371948242188 elapsed: 0.6100211143493652
DEBUG:base_model:epoch 95 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.0937, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:95, validation elapsed_time:0.1014249324798584
INFO:base_model:epoch 95 val result: {'loss': tensor(0.5787), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 96 Train Loss: 67.13793182373047 elapsed: 0.6671898365020752
DEBUG:base_model:epoch 96 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1379, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:96, validation elapsed_time:0.12227702140808105
INFO:base_model:epoch 96 val result: {'loss': tensor(0.5808), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 97 Train Loss: 66.85160064697266 elapsed: 0.8735737800598145
DEBUG:base_model:epoch 97 train finished. train_result_inf

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0097.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0097.pth


INFO:model.base_model:Epoch Step: 98 Train Loss: 67.95380401611328 elapsed: 0.7050449848175049
DEBUG:base_model:epoch 98 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.9538, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:98, validation elapsed_time:0.1170511245727539
INFO:base_model:epoch 98 val result: {'loss': tensor(0.6026), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 99 Train Loss: 67.15217590332031 elapsed: 0.6546239852905273
DEBUG:base_model:epoch 99 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1522, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:99, validation elapsed_time:0.09325528144836426
INFO:base_model:epoch 99 val result: {'loss': tensor(0.5822), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0099.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0099.pth


INFO:model.base_model:Epoch Step: 100 Train Loss: 68.3191909790039 elapsed: 0.9537110328674316
DEBUG:base_model:epoch 100 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.3192, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:100, validation elapsed_time:0.10383892059326172
INFO:base_model:epoch 100 val result: {'loss': tensor(0.5861), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 101 Train Loss: 66.93643951416016 elapsed: 0.7505209445953369
DEBUG:base_model:epoch 101 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.9364, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:101, validation elapsed_time:0.08670783042907715
INFO:base_model:epoch 101 val result: {'loss': tensor(0.5936), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 102 Train Loss: 68.93954467773438 elapsed: 0.8116929531097412
DEBUG:base_model:epoch 102 train f

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0117.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0117.pth


INFO:model.base_model:Epoch Step: 118 Train Loss: 66.9881820678711 elapsed: 0.7732768058776855
DEBUG:base_model:epoch 118 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.9882, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:118, validation elapsed_time:0.2073049545288086
INFO:base_model:epoch 118 val result: {'loss': tensor(0.5838), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 119 Train Loss: 68.98905944824219 elapsed: 0.9261102676391602
DEBUG:base_model:epoch 119 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.9891, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:119, validation elapsed_time:0.12484407424926758
INFO:base_model:epoch 119 val result: {'loss': tensor(0.6188), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 120 Train Loss: 67.36318969726562 elapsed: 0.82489013671875
DEBUG:base_model:epoch 120 train fini

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0124.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0124.pth


INFO:model.base_model:Epoch Step: 125 Train Loss: 67.06795501708984 elapsed: 0.6204757690429688
DEBUG:base_model:epoch 125 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.0680, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:125, validation elapsed_time:0.12459897994995117
INFO:base_model:epoch 125 val result: {'loss': tensor(0.5784), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 126 Train Loss: 68.18175506591797 elapsed: 0.7745771408081055
DEBUG:base_model:epoch 126 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.1818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:126, validation elapsed_time:0.1302471160888672
INFO:base_model:epoch 126 val result: {'loss': tensor(0.6301), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 127 Train Loss: 68.0427017211914 elapsed: 0.8221349716186523
DEBUG:base_model:epo

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0128.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0128.pth


INFO:model.base_model:Epoch Step: 129 Train Loss: 67.544677734375 elapsed: 0.8033781051635742
DEBUG:base_model:epoch 129 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.5447, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:129, validation elapsed_time:0.08335614204406738
INFO:base_model:epoch 129 val result: {'loss': tensor(0.5967), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 130 Train Loss: 69.89324188232422 elapsed: 0.8729519844055176
DEBUG:base_model:epoch 130 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.8932, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:130, validation elapsed_time:0.11732077598571777
INFO:base_model:epoch 130 val result: {'loss': tensor(0.6043), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0130.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0130.pth


INFO:model.base_model:Epoch Step: 131 Train Loss: 66.91085052490234 elapsed: 0.7214269638061523
DEBUG:base_model:epoch 131 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(66.9109, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:131, validation elapsed_time:0.08089280128479004
INFO:base_model:epoch 131 val result: {'loss': tensor(0.5797), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0131.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0131.pth


INFO:model.base_model:Epoch Step: 132 Train Loss: 67.01445770263672 elapsed: 0.8244931697845459
DEBUG:base_model:epoch 132 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.0145, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:132, validation elapsed_time:0.08430910110473633
INFO:base_model:epoch 132 val result: {'loss': tensor(0.5791), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 133 Train Loss: 68.19276428222656 elapsed: 0.7632851600646973
DEBUG:base_model:epoch 133 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.1928, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:133, validation elapsed_time:0.12528204917907715
INFO:base_model:epoch 133 val result: {'loss': tensor(0.6106), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0133.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0133.pth


INFO:model.base_model:Epoch Step: 134 Train Loss: 68.40728759765625 elapsed: 0.779649019241333
DEBUG:base_model:epoch 134 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.4073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:134, validation elapsed_time:0.09653019905090332
INFO:base_model:epoch 134 val result: {'loss': tensor(0.6147), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0134.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0134.pth


INFO:model.base_model:Epoch Step: 135 Train Loss: 69.54612731933594 elapsed: 0.6546280384063721
DEBUG:base_model:epoch 135 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.5461, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:135, validation elapsed_time:0.0841062068939209
INFO:base_model:epoch 135 val result: {'loss': tensor(0.5953), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0135.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0135.pth


INFO:model.base_model:Epoch Step: 136 Train Loss: 71.57785034179688 elapsed: 0.7980449199676514
DEBUG:base_model:epoch 136 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.5779, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:136, validation elapsed_time:0.1731119155883789
INFO:base_model:epoch 136 val result: {'loss': tensor(0.5805), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0136.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0136.pth


INFO:model.base_model:Epoch Step: 137 Train Loss: 71.96640014648438 elapsed: 0.8173849582672119
DEBUG:base_model:epoch 137 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.9664, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:137, validation elapsed_time:0.08688902854919434
INFO:base_model:epoch 137 val result: {'loss': tensor(0.6491), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 138 Train Loss: 73.12384796142578 elapsed: 0.7137281894683838
DEBUG:base_model:epoch 138 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(73.1238, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:138, validation elapsed_time:0.09861183166503906
INFO:base_model:epoch 138 val result: {'loss': tensor(0.5874), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 139 Train Loss: 67.23297119140625 elapsed: 0.6940689086914062
DEBUG:base_model:epoch 139 train finished. train

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0140.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0140.pth


INFO:model.base_model:Epoch Step: 141 Train Loss: 68.09359741210938 elapsed: 1.0280938148498535
DEBUG:base_model:epoch 141 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.0936, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:141, validation elapsed_time:0.2636070251464844
INFO:base_model:epoch 141 val result: {'loss': tensor(0.6129), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 142 Train Loss: 70.99644470214844 elapsed: 1.2177338600158691
DEBUG:base_model:epoch 142 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.9964, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:142, validation elapsed_time:0.1179189682006836
INFO:base_model:epoch 142 val result: {'loss': tensor(0.5793), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0142.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0142.pth


INFO:model.base_model:Epoch Step: 143 Train Loss: 70.0193099975586 elapsed: 0.826807975769043
DEBUG:base_model:epoch 143 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.0193, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:143, validation elapsed_time:0.12419295310974121
INFO:base_model:epoch 143 val result: {'loss': tensor(0.6669), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0143.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0143.pth


INFO:model.base_model:Epoch Step: 144 Train Loss: 69.83441925048828 elapsed: 1.0857570171356201
DEBUG:base_model:epoch 144 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.8344, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:144, validation elapsed_time:0.15253400802612305
INFO:base_model:epoch 144 val result: {'loss': tensor(0.5830), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 145 Train Loss: 71.98833465576172 elapsed: 0.7935628890991211
DEBUG:base_model:epoch 145 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.9883, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:145, validation elapsed_time:0.0942692756652832
INFO:base_model:epoch 145 val result: {'loss': tensor(0.6243), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 146 Train Loss: 69.10672760009766 elapsed: 0.7293660640716553
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0157.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0157.pth


INFO:model.base_model:Epoch Step: 158 Train Loss: 67.10987854003906 elapsed: 0.728424072265625
DEBUG:base_model:epoch 158 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.1099, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:158, validation elapsed_time:0.08430314064025879
INFO:base_model:epoch 158 val result: {'loss': tensor(0.5830), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 159 Train Loss: 70.05913543701172 elapsed: 0.6747348308563232
DEBUG:base_model:epoch 159 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.0591, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:159, validation elapsed_time:0.08084583282470703
INFO:base_model:epoch 159 val result: {'loss': tensor(0.5944), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 160 Train Loss: 68.56758880615234 elapsed: 0.7040970325469971
DEBUG:base_model:epoch 160 train f

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0162.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0162.pth


INFO:model.base_model:Epoch Step: 163 Train Loss: 69.04446411132812 elapsed: 0.9739871025085449
DEBUG:base_model:epoch 163 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.0445, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:163, validation elapsed_time:0.4200429916381836
INFO:base_model:epoch 163 val result: {'loss': tensor(0.6645), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 164 Train Loss: 73.45941925048828 elapsed: 0.9003591537475586
DEBUG:base_model:epoch 164 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(73.4594, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:164, validation elapsed_time:0.12555885314941406
INFO:base_model:epoch 164 val result: {'loss': tensor(0.5992), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 165 Train Loss: 67.39486694335938 elapsed: 0.6656951904296875
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0166.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0166.pth


INFO:model.base_model:Epoch Step: 167 Train Loss: 69.31828308105469 elapsed: 0.7822599411010742
DEBUG:base_model:epoch 167 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.3183, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:167, validation elapsed_time:0.09245896339416504
INFO:base_model:epoch 167 val result: {'loss': tensor(0.5786), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0167.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0167.pth


INFO:model.base_model:Epoch Step: 168 Train Loss: 69.49591064453125 elapsed: 0.6635277271270752
DEBUG:base_model:epoch 168 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.4959, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:168, validation elapsed_time:0.10904097557067871
INFO:base_model:epoch 168 val result: {'loss': tensor(0.6705), 'auc': 0.49396629176094764}
INFO:model.base_model:Epoch Step: 169 Train Loss: 72.34622192382812 elapsed: 0.6300151348114014
DEBUG:base_model:epoch 169 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.3462, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:169, validation elapsed_time:0.30696702003479004
INFO:base_model:epoch 169 val result: {'loss': tensor(0.5933), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 170 Train Loss: 68.15838623046875 elapsed: 0.9258360862731934
DEBUG:base_model:

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0174.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0174.pth


INFO:model.base_model:Epoch Step: 175 Train Loss: 67.9027099609375 elapsed: 0.7753200531005859
DEBUG:base_model:epoch 175 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.9027, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:175, validation elapsed_time:0.08389592170715332
INFO:base_model:epoch 175 val result: {'loss': tensor(0.5820), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 176 Train Loss: 67.63140869140625 elapsed: 0.920386791229248
DEBUG:base_model:epoch 176 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.6314, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:176, validation elapsed_time:0.10787105560302734
INFO:base_model:epoch 176 val result: {'loss': tensor(0.5993), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 177 Train Loss: 70.92849731445312 elapsed: 0.8004553318023682
DEBUG:base_model:epoch 177 train finished. train_r

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0181.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0181.pth


INFO:model.base_model:Epoch Step: 182 Train Loss: 69.97274017333984 elapsed: 0.8822650909423828
DEBUG:base_model:epoch 182 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.9727, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:182, validation elapsed_time:0.1180276870727539
INFO:base_model:epoch 182 val result: {'loss': tensor(0.5788), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 183 Train Loss: 68.28636169433594 elapsed: 0.8227548599243164
DEBUG:base_model:epoch 183 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.2864, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:183, validation elapsed_time:0.1439502239227295
INFO:base_model:epoch 183 val result: {'loss': tensor(0.6428), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0183.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0183.pth


INFO:model.base_model:Epoch Step: 184 Train Loss: 71.35791015625 elapsed: 0.7134759426116943
DEBUG:base_model:epoch 184 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.3579, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:184, validation elapsed_time:0.08763313293457031
INFO:base_model:epoch 184 val result: {'loss': tensor(0.5970), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 185 Train Loss: 68.00035095214844 elapsed: 0.6805880069732666
DEBUG:base_model:epoch 185 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.0004, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:185, validation elapsed_time:0.11455202102661133
INFO:base_model:epoch 185 val result: {'loss': tensor(0.6020), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 186 Train Loss: 71.71823120117188 elapsed: 0.7795031070709229
DEBUG:base_model:epoc

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0187.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0187.pth


INFO:model.base_model:Epoch Step: 188 Train Loss: 69.88734436035156 elapsed: 0.6944599151611328
DEBUG:base_model:epoch 188 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.8873, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:188, validation elapsed_time:0.08509397506713867
INFO:base_model:epoch 188 val result: {'loss': tensor(0.6355), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 189 Train Loss: 68.3430404663086 elapsed: 0.6199350357055664
DEBUG:base_model:epoch 189 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.3430, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:189, validation elapsed_time:0.080596923828125
INFO:base_model:epoch 189 val result: {'loss': tensor(0.5809), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0189.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0189.pth


INFO:model.base_model:Epoch Step: 190 Train Loss: 70.7576675415039 elapsed: 0.6716790199279785
DEBUG:base_model:epoch 190 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.7577, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:190, validation elapsed_time:0.08269286155700684
INFO:base_model:epoch 190 val result: {'loss': tensor(0.6259), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 191 Train Loss: 72.08904266357422 elapsed: 0.8810601234436035
DEBUG:base_model:epoch 191 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.0890, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:191, validation elapsed_time:0.08801794052124023
INFO:base_model:epoch 191 val result: {'loss': tensor(0.5917), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 192 Train Loss: 67.19976806640625 elapsed: 0.6640830039978027
DEBUG:base_model:epoch 192 train finished. train_

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0194.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0194.pth


INFO:model.base_model:Epoch Step: 195 Train Loss: 70.72602844238281 elapsed: 0.6585981845855713
DEBUG:base_model:epoch 195 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.7260, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:195, validation elapsed_time:0.10601496696472168
INFO:base_model:epoch 195 val result: {'loss': tensor(0.6241), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 196 Train Loss: 69.42586517333984 elapsed: 0.6422519683837891
DEBUG:base_model:epoch 196 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.4259, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:196, validation elapsed_time:0.08272814750671387
INFO:base_model:epoch 196 val result: {'loss': tensor(0.5837), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 197 Train Loss: 71.24441528320312 elapsed: 0.8283917903900146
DEBUG:base_model:epoch 197 train

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0198.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0198.pth


INFO:model.base_model:Epoch Step: 199 Train Loss: 70.46334075927734 elapsed: 0.6414499282836914
DEBUG:base_model:epoch 199 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.4633, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:199, validation elapsed_time:0.08327603340148926
INFO:base_model:epoch 199 val result: {'loss': tensor(0.6379), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 200 Train Loss: 74.23696899414062 elapsed: 0.6561951637268066
DEBUG:base_model:epoch 200 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.2370, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:200, validation elapsed_time:0.26392316818237305
INFO:base_model:epoch 200 val result: {'loss': tensor(0.6024), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0200.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0200.pth


INFO:model.base_model:Epoch Step: 201 Train Loss: 67.64873504638672 elapsed: 0.7585561275482178
DEBUG:base_model:epoch 201 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.6487, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:201, validation elapsed_time:0.11518311500549316
INFO:base_model:epoch 201 val result: {'loss': tensor(0.5845), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 202 Train Loss: 67.69314575195312 elapsed: 0.626929759979248
DEBUG:base_model:epoch 202 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.6931, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:202, validation elapsed_time:0.09632396697998047
INFO:base_model:epoch 202 val result: {'loss': tensor(0.6040), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 203 Train Loss: 67.21007537841797 elapsed: 0.7124300003051758
DEBUG:base_model:e

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0203.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0203.pth


INFO:model.base_model:Epoch Step: 204 Train Loss: 71.05870819091797 elapsed: 0.8075339794158936
DEBUG:base_model:epoch 204 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.0587, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:204, validation elapsed_time:0.09236669540405273
INFO:base_model:epoch 204 val result: {'loss': tensor(0.6078), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0204.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0204.pth


INFO:model.base_model:Epoch Step: 205 Train Loss: 68.47584533691406 elapsed: 0.6566300392150879
DEBUG:base_model:epoch 205 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.4758, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:205, validation elapsed_time:0.10077190399169922
INFO:base_model:epoch 205 val result: {'loss': tensor(0.6331), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 206 Train Loss: 72.27263641357422 elapsed: 0.6387567520141602
DEBUG:base_model:epoch 206 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.2726, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:206, validation elapsed_time:0.10759925842285156
INFO:base_model:epoch 206 val result: {'loss': tensor(0.5900), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0206.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0206.pth


INFO:model.base_model:Epoch Step: 207 Train Loss: 71.08765411376953 elapsed: 0.9335989952087402
DEBUG:base_model:epoch 207 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.0877, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:207, validation elapsed_time:0.09906506538391113
INFO:base_model:epoch 207 val result: {'loss': tensor(0.6644), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0207.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0207.pth


INFO:model.base_model:Epoch Step: 208 Train Loss: 72.78170013427734 elapsed: 0.6901559829711914
DEBUG:base_model:epoch 208 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.7817, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:208, validation elapsed_time:0.08607792854309082
INFO:base_model:epoch 208 val result: {'loss': tensor(0.5805), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0208.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0208.pth


INFO:model.base_model:Epoch Step: 209 Train Loss: 73.42473602294922 elapsed: 0.6631653308868408
DEBUG:base_model:epoch 209 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(73.4247, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:209, validation elapsed_time:0.08452200889587402
INFO:base_model:epoch 209 val result: {'loss': tensor(0.7042), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 210 Train Loss: 70.6119384765625 elapsed: 0.8254921436309814
DEBUG:base_model:epoch 210 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.6119, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:210, validation elapsed_time:0.0914301872253418
INFO:base_model:epoch 210 val result: {'loss': tensor(0.5843), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 211 Train Loss: 73.79339599609375 elapsed: 0.7944686412811279
DEBUG:base_model:epo

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0211.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0211.pth


INFO:model.base_model:Epoch Step: 212 Train Loss: 70.98712158203125 elapsed: 0.7020018100738525
DEBUG:base_model:epoch 212 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.9871, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:212, validation elapsed_time:0.0932607650756836
INFO:base_model:epoch 212 val result: {'loss': tensor(0.5897), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0212.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0212.pth


INFO:model.base_model:Epoch Step: 213 Train Loss: 75.20136260986328 elapsed: 0.770190954208374
DEBUG:base_model:epoch 213 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.2014, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:213, validation elapsed_time:0.3071112632751465
INFO:base_model:epoch 213 val result: {'loss': tensor(0.6933), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 214 Train Loss: 68.96036529541016 elapsed: 0.9237778186798096
DEBUG:base_model:epoch 214 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.9604, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:214, validation elapsed_time:0.13204717636108398
INFO:base_model:epoch 214 val result: {'loss': tensor(0.5834), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0214.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0214.pth


INFO:model.base_model:Epoch Step: 215 Train Loss: 69.72122192382812 elapsed: 0.9512579441070557
DEBUG:base_model:epoch 215 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.7212, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:215, validation elapsed_time:0.18204712867736816
INFO:base_model:epoch 215 val result: {'loss': tensor(0.5946), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0215.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0215.pth


INFO:model.base_model:Epoch Step: 216 Train Loss: 68.07295227050781 elapsed: 0.7632231712341309
DEBUG:base_model:epoch 216 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.0730, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:216, validation elapsed_time:0.27208924293518066
INFO:base_model:epoch 216 val result: {'loss': tensor(0.6515), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0216.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0216.pth


INFO:model.base_model:Epoch Step: 217 Train Loss: 74.47003173828125 elapsed: 0.8621251583099365
DEBUG:base_model:epoch 217 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.4700, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:217, validation elapsed_time:0.12810397148132324
INFO:base_model:epoch 217 val result: {'loss': tensor(0.5850), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0217.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0217.pth


INFO:model.base_model:Epoch Step: 218 Train Loss: 74.3445053100586 elapsed: 0.6567800045013428
DEBUG:base_model:epoch 218 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.3445, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:218, validation elapsed_time:0.1580829620361328
INFO:base_model:epoch 218 val result: {'loss': tensor(0.7021), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 219 Train Loss: 77.25385284423828 elapsed: 0.8306839466094971
DEBUG:base_model:epoch 219 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.2539, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:219, validation elapsed_time:0.08709096908569336
INFO:base_model:epoch 219 val result: {'loss': tensor(0.5813), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 220 Train Loss: 73.40179443359375 elapsed: 0.8109829425811768
DEBUG:base_model:epo

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0221.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0221.pth


INFO:model.base_model:Epoch Step: 222 Train Loss: 69.50172424316406 elapsed: 0.8740761280059814
DEBUG:base_model:epoch 222 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.5017, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:222, validation elapsed_time:0.17534613609313965
INFO:base_model:epoch 222 val result: {'loss': tensor(0.6120), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 223 Train Loss: 69.77326202392578 elapsed: 0.8290150165557861
DEBUG:base_model:epoch 223 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.7733, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:223, validation elapsed_time:0.10469579696655273
INFO:base_model:epoch 223 val result: {'loss': tensor(0.5792), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 224 Train Loss: 69.7138442993164 elapsed: 0.6796472072601318
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0227.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0227.pth


INFO:model.base_model:Epoch Step: 228 Train Loss: 75.62621307373047 elapsed: 0.6704161167144775
DEBUG:base_model:epoch 228 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.6262, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:228, validation elapsed_time:0.08205485343933105
INFO:base_model:epoch 228 val result: {'loss': tensor(0.5966), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 229 Train Loss: 67.4880599975586 elapsed: 0.9751811027526855
DEBUG:base_model:epoch 229 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.4881, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:229, validation elapsed_time:0.11841988563537598
INFO:base_model:epoch 229 val result: {'loss': tensor(0.5962), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 230 Train Loss: 70.29959869384766 elapsed: 0.8020851612091064
DEBUG:base_model:epoch 230 train f

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0230.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0230.pth


INFO:model.base_model:Epoch Step: 231 Train Loss: 69.25257873535156 elapsed: 0.6651651859283447
DEBUG:base_model:epoch 231 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.2526, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:231, validation elapsed_time:0.10156011581420898
INFO:base_model:epoch 231 val result: {'loss': tensor(0.5858), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 232 Train Loss: 67.53651428222656 elapsed: 0.7609710693359375
DEBUG:base_model:epoch 232 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.5365, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:232, validation elapsed_time:0.12667179107666016
INFO:base_model:epoch 232 val result: {'loss': tensor(0.6029), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 233 Train Loss: 73.65039825439453 elapsed: 0.9698171615600586
DEBUG:base_model:e

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0236.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0236.pth


INFO:model.base_model:Epoch Step: 237 Train Loss: 70.72770690917969 elapsed: 0.7931416034698486
DEBUG:base_model:epoch 237 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.7277, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:237, validation elapsed_time:0.12321305274963379
INFO:base_model:epoch 237 val result: {'loss': tensor(0.5898), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 238 Train Loss: 67.4725570678711 elapsed: 0.7212650775909424
DEBUG:base_model:epoch 238 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.4726, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:238, validation elapsed_time:0.10303735733032227
INFO:base_model:epoch 238 val result: {'loss': tensor(0.5952), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0238.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0238.pth


INFO:model.base_model:Epoch Step: 239 Train Loss: 72.10765075683594 elapsed: 0.6525959968566895
DEBUG:base_model:epoch 239 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.1077, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:239, validation elapsed_time:0.08294296264648438
INFO:base_model:epoch 239 val result: {'loss': tensor(0.6273), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 240 Train Loss: 71.97554779052734 elapsed: 0.769848108291626
DEBUG:base_model:epoch 240 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.9755, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:240, validation elapsed_time:0.08869409561157227
INFO:base_model:epoch 240 val result: {'loss': tensor(0.5924), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 241 Train Loss: 67.20438385009766 elapsed: 0.7879700660705566
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0244.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0244.pth


INFO:model.base_model:Epoch Step: 245 Train Loss: 71.61552429199219 elapsed: 0.6050748825073242
DEBUG:base_model:epoch 245 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.6155, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:245, validation elapsed_time:0.0949561595916748
INFO:base_model:epoch 245 val result: {'loss': tensor(0.6094), 'auc': 0.5060337082390522}
INFO:model.base_model:Epoch Step: 246 Train Loss: 68.109130859375 elapsed: 0.699167013168335
DEBUG:base_model:epoch 246 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.1091, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:246, validation elapsed_time:0.19816994667053223
INFO:base_model:epoch 246 val result: {'loss': tensor(0.5826), 'auc': 0.5064395523948965}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0246.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0246.pth


INFO:model.base_model:Epoch Step: 247 Train Loss: 69.7298812866211 elapsed: 0.8363161087036133
DEBUG:base_model:epoch 247 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.7299, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:247, validation elapsed_time:0.12505197525024414
INFO:base_model:epoch 247 val result: {'loss': tensor(0.6600), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 248 Train Loss: 72.68434143066406 elapsed: 0.6647400856018066
DEBUG:base_model:epoch 248 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.6843, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:248, validation elapsed_time:0.09348082542419434
INFO:base_model:epoch 248 val result: {'loss': tensor(0.6158), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 249 Train Loss: 66.96649169921875 elapsed: 0.6541728973388672
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0254.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0254.pth


INFO:model.base_model:Epoch Step: 255 Train Loss: 70.57919311523438 elapsed: 0.8209068775177002
DEBUG:base_model:epoch 255 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.5792, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:255, validation elapsed_time:0.08154416084289551
INFO:base_model:epoch 255 val result: {'loss': tensor(0.6264), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 256 Train Loss: 72.88224029541016 elapsed: 0.87288498878479
DEBUG:base_model:epoch 256 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.8822, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:256, validation elapsed_time:0.17693686485290527
INFO:base_model:epoch 256 val result: {'loss': tensor(0.5985), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 257 Train Loss: 67.19233703613281 elapsed: 0.8090920448303223
DEBUG:base_model:epoch 257 train fi

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0259.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0259.pth


INFO:model.base_model:Epoch Step: 260 Train Loss: 68.00614929199219 elapsed: 0.7745330333709717
DEBUG:base_model:epoch 260 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.0061, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:260, validation elapsed_time:0.08083891868591309
INFO:base_model:epoch 260 val result: {'loss': tensor(0.6048), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 261 Train Loss: 71.38445281982422 elapsed: 0.7219147682189941
DEBUG:base_model:epoch 261 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.3845, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:261, validation elapsed_time:0.2751789093017578
INFO:base_model:epoch 261 val result: {'loss': tensor(0.5981), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 262 Train Loss: 67.46247100830078 elapsed: 0.8654179573059082
DEBUG:base_model:epoch 262 train 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0263.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0263.pth


INFO:model.base_model:Epoch Step: 264 Train Loss: 69.92731475830078 elapsed: 0.6182887554168701
DEBUG:base_model:epoch 264 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.9273, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:264, validation elapsed_time:0.2015540599822998
INFO:base_model:epoch 264 val result: {'loss': tensor(0.5795), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 265 Train Loss: 68.79998779296875 elapsed: 0.7905142307281494
DEBUG:base_model:epoch 265 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.8000, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:265, validation elapsed_time:0.08731794357299805
INFO:base_model:epoch 265 val result: {'loss': tensor(0.6149), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 266 Train Loss: 73.19254302978516 elapsed: 0.7784061431884766
DEBUG:base_model:epoch 266 train f

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0269.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0269.pth


INFO:model.base_model:Epoch Step: 270 Train Loss: 67.62227630615234 elapsed: 0.6529548168182373
DEBUG:base_model:epoch 270 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.6223, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:270, validation elapsed_time:0.09867382049560547
INFO:base_model:epoch 270 val result: {'loss': tensor(0.5793), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 271 Train Loss: 68.15287017822266 elapsed: 0.6506679058074951
DEBUG:base_model:epoch 271 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.1529, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:271, validation elapsed_time:0.0828249454498291
INFO:base_model:epoch 271 val result: {'loss': tensor(0.6045), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 272 Train Loss: 70.20101928710938 elapsed: 0.9159259796142578
DEBUG:base_model:ep

INFO:base_model:epoch 288 val result: {'loss': tensor(0.6034), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 289 Train Loss: 68.16802215576172 elapsed: 0.9888567924499512
DEBUG:base_model:epoch 289 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.1680, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:289, validation elapsed_time:0.16742610931396484
INFO:base_model:epoch 289 val result: {'loss': tensor(0.6130), 'auc': 0.5004058441558441}
INFO:model.base_model:Epoch Step: 290 Train Loss: 74.92765808105469 elapsed: 0.8032321929931641
DEBUG:base_model:epoch 290 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.9277, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:290, validation elapsed_time:0.08623695373535156
INFO:base_model:epoch 290 val result: {'loss': tensor(0.6317), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 291 

INFO:model.base_model:cur_epoch:307, validation elapsed_time:0.0826117992401123
INFO:base_model:epoch 307 val result: {'loss': tensor(0.5891), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 308 Train Loss: 68.50261688232422 elapsed: 0.6331272125244141
DEBUG:base_model:epoch 308 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.5026, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:308, validation elapsed_time:0.14119911193847656
INFO:base_model:epoch 308 val result: {'loss': tensor(0.6564), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 309 Train Loss: 74.41030883789062 elapsed: 0.8684859275817871
DEBUG:base_model:epoch 309 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.4103, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:309, validation elapsed_time:0.08621907234191895
INFO:base_model:epoch 309 val result: {'loss': te

INFO:model.base_model:cur_epoch:326, validation elapsed_time:0.20451998710632324
INFO:base_model:epoch 326 val result: {'loss': tensor(0.6985), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 327 Train Loss: 71.5514144897461 elapsed: 0.9528679847717285
DEBUG:base_model:epoch 327 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.5514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:327, validation elapsed_time:0.08977103233337402
INFO:base_model:epoch 327 val result: {'loss': tensor(0.5815), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 328 Train Loss: 77.61862182617188 elapsed: 0.6217641830444336
DEBUG:base_model:epoch 328 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.6186, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:328, validation elapsed_time:0.08334589004516602
INFO:base_model:epoch 328 val resu

INFO:model.base_model:cur_epoch:345, validation elapsed_time:0.0845038890838623
INFO:base_model:epoch 345 val result: {'loss': tensor(0.6340), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 346 Train Loss: 68.81036376953125 elapsed: 0.870107889175415
DEBUG:base_model:epoch 346 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(68.8104, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:346, validation elapsed_time:0.1184530258178711
INFO:base_model:epoch 346 val result: {'loss': tensor(0.6019), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 347 Train Loss: 74.54777526855469 elapsed: 0.7475111484527588
DEBUG:base_model:epoch 347 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.5478, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:347, validation elapsed_time:0.08191275596618652
INFO:base_model:epoch 347 val result

DEBUG:base_model:epoch 364 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.8753, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:364, validation elapsed_time:0.12107300758361816
INFO:base_model:epoch 364 val result: {'loss': tensor(0.6155), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 365 Train Loss: 80.13233184814453 elapsed: 0.8871791362762451
DEBUG:base_model:epoch 365 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(80.1323, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:365, validation elapsed_time:0.08475613594055176
INFO:base_model:epoch 365 val result: {'loss': tensor(0.7026), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 366 Train Loss: 75.3388671875 elapsed: 0.8001110553741455
DEBUG:base_model:epoch 366 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.3389, grad

INFO:model.base_model:Epoch Step: 383 Train Loss: 72.56538391113281 elapsed: 0.8680469989776611
DEBUG:base_model:epoch 383 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.5654, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:383, validation elapsed_time:0.10179710388183594
INFO:base_model:epoch 383 val result: {'loss': tensor(0.6899), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 384 Train Loss: 81.29627990722656 elapsed: 0.7019610404968262
DEBUG:base_model:epoch 384 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(81.2963, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:384, validation elapsed_time:0.08677816390991211
INFO:base_model:epoch 384 val result: {'loss': tensor(0.6290), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 385 Train Loss: 69.48085021972656 elapsed: 0.6715700626373291
DEBUG:base_model:e

INFO:base_model:epoch 401 val result: {'loss': tensor(0.5935), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 402 Train Loss: 75.01486206054688 elapsed: 0.9805800914764404
DEBUG:base_model:epoch 402 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.0149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:402, validation elapsed_time:0.14038610458374023
INFO:base_model:epoch 402 val result: {'loss': tensor(0.6605), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 403 Train Loss: 74.39923095703125 elapsed: 0.7225441932678223
DEBUG:base_model:epoch 403 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.3992, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:403, validation elapsed_time:0.0854802131652832
INFO:base_model:epoch 403 val result: {'loss': tensor(0.5909), 'auc': 0.4935604476051035}
INFO:model.base_model:Epo

INFO:model.base_model:cur_epoch:420, validation elapsed_time:0.12177300453186035
INFO:base_model:epoch 420 val result: {'loss': tensor(0.7303), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 421 Train Loss: 70.82557678222656 elapsed: 0.5936682224273682
DEBUG:base_model:epoch 421 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.8256, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:421, validation elapsed_time:0.08313822746276855
INFO:base_model:epoch 421 val result: {'loss': tensor(0.5786), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 422 Train Loss: 70.62355041503906 elapsed: 0.7471780776977539
DEBUG:base_model:epoch 422 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.6236, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:422, validation elapsed_time:0.22873210906982422
INFO:base_model:epoch 422 val res

DEBUG:base_model:epoch 439 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(76.5533, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:439, validation elapsed_time:0.09193706512451172
INFO:base_model:epoch 439 val result: {'loss': tensor(0.5879), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 440 Train Loss: 67.67415618896484 elapsed: 0.8077230453491211
DEBUG:base_model:epoch 440 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(67.6742, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:440, validation elapsed_time:0.1518089771270752
INFO:base_model:epoch 440 val result: {'loss': tensor(0.6007), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 441 Train Loss: 69.34566497802734 elapsed: 0.6645858287811279
DEBUG:base_model:epoch 441 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(69.3457, g

INFO:model.base_model:Epoch Step: 458 Train Loss: 72.34747314453125 elapsed: 0.6209700107574463
DEBUG:base_model:epoch 458 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(72.3475, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:458, validation elapsed_time:0.08492493629455566
INFO:base_model:epoch 458 val result: {'loss': tensor(0.6984), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 459 Train Loss: 85.13944244384766 elapsed: 0.7402429580688477
DEBUG:base_model:epoch 459 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(85.1394, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:459, validation elapsed_time:0.18170499801635742
INFO:base_model:epoch 459 val result: {'loss': tensor(0.6826), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 460 Train Loss: 72.05608367919922 elapsed: 0.745079755783081
DEBUG:base_model:ep

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0469.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0469.pth


INFO:model.base_model:Epoch Step: 470 Train Loss: 73.77059936523438 elapsed: 0.9342269897460938
DEBUG:base_model:epoch 470 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(73.7706, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:470, validation elapsed_time:0.11151790618896484
INFO:base_model:epoch 470 val result: {'loss': tensor(0.6157), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 471 Train Loss: 86.40657043457031 elapsed: 0.7137198448181152
DEBUG:base_model:epoch 471 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(86.4066, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:471, validation elapsed_time:0.08323287963867188
INFO:base_model:epoch 471 val result: {'loss': tensor(0.7930), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 472 Train Loss: 84.59691619873047 elapsed: 0.6732518672943115
DEBUG:base_model:e

INFO:model.base_model:cur_epoch:488, validation elapsed_time:0.08373808860778809
INFO:base_model:epoch 488 val result: {'loss': tensor(0.8236), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 489 Train Loss: 80.93401336669922 elapsed: 0.7040841579437256
DEBUG:base_model:epoch 489 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(80.9340, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:489, validation elapsed_time:0.08378911018371582
INFO:base_model:epoch 489 val result: {'loss': tensor(0.5789), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 490 Train Loss: 77.75581359863281 elapsed: 0.7701740264892578
DEBUG:base_model:epoch 490 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.7558, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:490, validation elapsed_time:0.10054183006286621
INFO:base_model:epoch 490 val result: {'loss': t

INFO:model.base_model:cur_epoch:507, validation elapsed_time:0.11760497093200684
INFO:base_model:epoch 507 val result: {'loss': tensor(0.5943), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 508 Train Loss: 122.15776824951172 elapsed: 0.6503129005432129
DEBUG:base_model:epoch 508 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(122.1578, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:508, validation elapsed_time:0.0765540599822998
INFO:base_model:epoch 508 val result: {'loss': tensor(0.6226), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 509 Train Loss: 114.5247573852539 elapsed: 0.7462420463562012
DEBUG:base_model:epoch 509 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(114.5248, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:509, validation elapsed_time:0.2731649875640869
INFO:base_model:epoch 509 val r

DEBUG:base_model:epoch 526 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(90.6997, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:526, validation elapsed_time:0.0829157829284668
INFO:base_model:epoch 526 val result: {'loss': tensor(0.6291), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 527 Train Loss: 71.45289611816406 elapsed: 0.7039697170257568
DEBUG:base_model:epoch 527 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.4529, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:527, validation elapsed_time:0.09316396713256836
INFO:base_model:epoch 527 val result: {'loss': tensor(0.7982), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 528 Train Loss: 98.71012878417969 elapsed: 0.8115692138671875
DEBUG:base_model:epoch 528 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': te

INFO:model.base_model:Epoch Step: 545 Train Loss: 71.27125549316406 elapsed: 0.7012338638305664
DEBUG:base_model:epoch 545 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.2713, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:545, validation elapsed_time:0.08368611335754395
INFO:base_model:epoch 545 val result: {'loss': tensor(0.7775), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 546 Train Loss: 94.38481903076172 elapsed: 0.6626298427581787
DEBUG:base_model:epoch 546 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(94.3848, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:546, validation elapsed_time:0.08607888221740723
INFO:base_model:epoch 546 val result: {'loss': tensor(0.7531), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 547 Train Loss: 70.60114288330078 elapsed: 0.7504968643188477
DEBUG:base_model:e

INFO:model.base_model:cur_epoch:563, validation elapsed_time:0.11698794364929199
INFO:base_model:epoch 563 val result: {'loss': tensor(0.8175), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 564 Train Loss: 94.26990509033203 elapsed: 0.6207809448242188
DEBUG:base_model:epoch 564 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(94.2699, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:564, validation elapsed_time:0.0825350284576416
INFO:base_model:epoch 564 val result: {'loss': tensor(0.7247), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 565 Train Loss: 74.05439758300781 elapsed: 0.8583779335021973
DEBUG:base_model:epoch 565 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.0544, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:565, validation elapsed_time:0.1175847053527832
INFO:base_model:epoch 565 val result: {'loss': ten

INFO:model.base_model:cur_epoch:582, validation elapsed_time:0.11840224266052246
INFO:base_model:epoch 582 val result: {'loss': tensor(0.6393), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 583 Train Loss: 70.15245056152344 elapsed: 0.6953539848327637
DEBUG:base_model:epoch 583 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.1525, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:583, validation elapsed_time:0.08584904670715332
INFO:base_model:epoch 583 val result: {'loss': tensor(0.6510), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 584 Train Loss: 82.26183319091797 elapsed: 0.6296770572662354
DEBUG:base_model:epoch 584 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(82.2618, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:584, validation elapsed_time:0.26924586296081543
INFO:base_model:epoch 584 val res

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0585.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0585.pth


INFO:model.base_model:Epoch Step: 586 Train Loss: 81.04976654052734 elapsed: 0.7790000438690186
DEBUG:base_model:epoch 586 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(81.0498, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:586, validation elapsed_time:0.10605096817016602
INFO:base_model:epoch 586 val result: {'loss': tensor(0.9522), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 587 Train Loss: 86.5360336303711 elapsed: 1.3177752494812012
DEBUG:base_model:epoch 587 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(86.5360, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:587, validation elapsed_time:0.17467617988586426
INFO:base_model:epoch 587 val result: {'loss': tensor(0.5793), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 588 Train Loss: 79.55024719238281 elapsed: 0.9425499439239502
DEBUG:base_model:ep

INFO:base_model:epoch 604 val result: {'loss': tensor(0.7385), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 605 Train Loss: 74.53680419921875 elapsed: 0.7709000110626221
DEBUG:base_model:epoch 605 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.5368, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:605, validation elapsed_time:0.15795207023620605
INFO:base_model:epoch 605 val result: {'loss': tensor(0.5785), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 606 Train Loss: 71.7317886352539 elapsed: 0.7614622116088867
DEBUG:base_model:epoch 606 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.7318, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:606, validation elapsed_time:0.12101197242736816
INFO:base_model:epoch 606 val result: {'loss': tensor(0.7225), 'auc': 0.49959415584415584}
INFO:model.base_model:Ep

INFO:model.base_model:cur_epoch:623, validation elapsed_time:0.13231897354125977
INFO:base_model:epoch 623 val result: {'loss': tensor(0.7886), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 624 Train Loss: 78.93136596679688 elapsed: 0.9169659614562988
DEBUG:base_model:epoch 624 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(78.9314, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:624, validation elapsed_time:0.08721494674682617
INFO:base_model:epoch 624 val result: {'loss': tensor(0.5858), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 625 Train Loss: 92.9593276977539 elapsed: 0.6279220581054688
DEBUG:base_model:epoch 625 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(92.9593, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:625, validation elapsed_time:0.21997284889221191
INFO:base_model:epoch 625 val resu

INFO:model.base_model:cur_epoch:642, validation elapsed_time:0.08082818984985352
INFO:base_model:epoch 642 val result: {'loss': tensor(0.6306), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 643 Train Loss: 89.15772247314453 elapsed: 0.8841018676757812
DEBUG:base_model:epoch 643 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(89.1577, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:643, validation elapsed_time:0.10227823257446289
INFO:base_model:epoch 643 val result: {'loss': tensor(0.8484), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 644 Train Loss: 94.58213806152344 elapsed: 0.7774488925933838
DEBUG:base_model:epoch 644 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(94.5821, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:644, validation elapsed_time:0.07943320274353027
INFO:base_model:epoch 644 val res

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0650.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0650.pth


INFO:model.base_model:Epoch Step: 651 Train Loss: 100.2469482421875 elapsed: 0.6879501342773438
DEBUG:base_model:epoch 651 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(100.2469, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:651, validation elapsed_time:0.0736839771270752
INFO:base_model:epoch 651 val result: {'loss': tensor(0.9389), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 652 Train Loss: 79.1160659790039 elapsed: 0.7143981456756592
DEBUG:base_model:epoch 652 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(79.1161, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:652, validation elapsed_time:0.2525780200958252
INFO:base_model:epoch 652 val result: {'loss': tensor(0.5796), 'auc': 0.5004058441558441}
INFO:model.base_model:Epoch Step: 653 Train Loss: 76.58941650390625 elapsed: 0.7329459190368652
DEBUG:base_model:ep

INFO:base_model:epoch 669 val result: {'loss': tensor(0.6582), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 670 Train Loss: 70.52873992919922 elapsed: 0.7920360565185547
DEBUG:base_model:epoch 670 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(70.5287, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:670, validation elapsed_time:0.12848734855651855
INFO:base_model:epoch 670 val result: {'loss': tensor(0.6853), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 671 Train Loss: 96.766357421875 elapsed: 0.7804441452026367
DEBUG:base_model:epoch 671 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(96.7664, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:671, validation elapsed_time:0.0856788158416748
INFO:base_model:epoch 671 val result: {'loss': tensor(0.8260), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0682.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0682.pth


INFO:model.base_model:Epoch Step: 683 Train Loss: 71.20153045654297 elapsed: 0.8701450824737549
DEBUG:base_model:epoch 683 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(71.2015, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:683, validation elapsed_time:0.14648175239562988
INFO:base_model:epoch 683 val result: {'loss': tensor(0.7038), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 684 Train Loss: 97.2718505859375 elapsed: 0.7661309242248535
DEBUG:base_model:epoch 684 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(97.2719, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:684, validation elapsed_time:0.08041191101074219
INFO:base_model:epoch 684 val result: {'loss': tensor(0.8797), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 685 Train Loss: 96.99723052978516 elapsed: 0.5780251026153564
DEBUG:base_model:e

INFO:base_model:epoch 701 val result: {'loss': tensor(0.6231), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 702 Train Loss: 97.2654037475586 elapsed: 0.6934988498687744
DEBUG:base_model:epoch 702 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(97.2654, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:702, validation elapsed_time:0.09529685974121094
INFO:base_model:epoch 702 val result: {'loss': tensor(0.9406), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 703 Train Loss: 96.80650329589844 elapsed: 0.9035859107971191
DEBUG:base_model:epoch 703 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(96.8065, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:703, validation elapsed_time:0.13539695739746094
INFO:base_model:epoch 703 val result: {'loss': tensor(0.6137), 'auc': 0.4935604476051035}
INFO:model.base_model:Epo

INFO:model.base_model:cur_epoch:720, validation elapsed_time:0.17765307426452637
INFO:base_model:epoch 720 val result: {'loss': tensor(0.7725), 'auc': 0.5004058441558441}
INFO:model.base_model:Epoch Step: 721 Train Loss: 103.90087127685547 elapsed: 0.7846379280090332
DEBUG:base_model:epoch 721 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(103.9009, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:721, validation elapsed_time:0.09923291206359863
INFO:base_model:epoch 721 val result: {'loss': tensor(0.8939), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 722 Train Loss: 91.2206802368164 elapsed: 0.6928880214691162
DEBUG:base_model:epoch 722 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(91.2207, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:722, validation elapsed_time:0.11846780776977539
INFO:base_model:epoch 722 val re

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0734.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0734.pth


INFO:model.base_model:Epoch Step: 735 Train Loss: 75.04559326171875 elapsed: 0.6933019161224365
DEBUG:base_model:epoch 735 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.0456, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:735, validation elapsed_time:0.11137700080871582
INFO:base_model:epoch 735 val result: {'loss': tensor(0.6954), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 736 Train Loss: 97.48087310791016 elapsed: 0.6144998073577881
DEBUG:base_model:epoch 736 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(97.4809, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:736, validation elapsed_time:0.09025716781616211
INFO:base_model:epoch 736 val result: {'loss': tensor(0.8705), 'auc': 0.5004058441558441}
INFO:model.base_model:Epoch Step: 737 Train Loss: 92.72463989257812 elapsed: 0.807703971862793
DEBUG:base_model:epoch 737 train f

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0743.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0743.pth


INFO:model.base_model:Epoch Step: 744 Train Loss: 114.39568328857422 elapsed: 0.8178808689117432
DEBUG:base_model:epoch 744 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(114.3957, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:744, validation elapsed_time:0.13881921768188477
INFO:base_model:epoch 744 val result: {'loss': tensor(0.8143), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 745 Train Loss: 75.26532745361328 elapsed: 0.664315938949585
DEBUG:base_model:epoch 745 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.2653, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:745, validation elapsed_time:0.08382987976074219
INFO:base_model:epoch 745 val result: {'loss': tensor(0.7659), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 746 Train Loss: 109.9017562866211 elapsed: 0.6835241317749023
DEBUG:base_model:

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0758.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0758.pth


INFO:model.base_model:Epoch Step: 759 Train Loss: 77.4111328125 elapsed: 0.6472299098968506
DEBUG:base_model:epoch 759 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.4111, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:759, validation elapsed_time:0.09757423400878906
INFO:base_model:epoch 759 val result: {'loss': tensor(0.6328), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 760 Train Loss: 98.18878936767578 elapsed: 0.7926120758056641
DEBUG:base_model:epoch 760 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(98.1888, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:760, validation elapsed_time:0.08813714981079102
INFO:base_model:epoch 760 val result: {'loss': tensor(0.9804), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 761 Train Loss: 108.22669219970703 elapsed: 0.8554511070251465
DEBUG:base_model:epoc

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0763.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0763.pth


INFO:model.base_model:Epoch Step: 764 Train Loss: 77.59828186035156 elapsed: 0.7121119499206543
DEBUG:base_model:epoch 764 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.5983, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:764, validation elapsed_time:0.10981297492980957
INFO:base_model:epoch 764 val result: {'loss': tensor(0.6431), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 765 Train Loss: 92.26802825927734 elapsed: 0.6628470420837402
DEBUG:base_model:epoch 765 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(92.2680, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:765, validation elapsed_time:0.08024001121520996
INFO:base_model:epoch 765 val result: {'loss': tensor(0.8636), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 766 Train Loss: 96.84725952148438 elapsed: 0.6554732322692871
DEBUG:base_model:epoch 766 train 

INFO:base_model:epoch 782 val result: {'loss': tensor(1.1857), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 783 Train Loss: 121.02637481689453 elapsed: 0.7375679016113281
DEBUG:base_model:epoch 783 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(121.0264, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:783, validation elapsed_time:0.08145022392272949
INFO:base_model:epoch 783 val result: {'loss': tensor(0.7003), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 784 Train Loss: 77.24681854248047 elapsed: 0.8159999847412109
DEBUG:base_model:epoch 784 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(77.2468, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:784, validation elapsed_time:0.10274577140808105
INFO:base_model:epoch 784 val result: {'loss': tensor(0.8201), 'auc': 0.5064395523948965}
INFO:model.base_model:

INFO:model.base_model:cur_epoch:801, validation elapsed_time:0.0825650691986084
INFO:base_model:epoch 801 val result: {'loss': tensor(0.7920), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 802 Train Loss: 78.03962707519531 elapsed: 0.8974149227142334
DEBUG:base_model:epoch 802 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(78.0396, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:802, validation elapsed_time:0.1370248794555664
INFO:base_model:epoch 802 val result: {'loss': tensor(0.6000), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 803 Train Loss: 97.67660522460938 elapsed: 0.7110037803649902
DEBUG:base_model:epoch 803 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(97.6766, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:803, validation elapsed_time:0.08077478408813477
INFO:base_model:epoch 803 val result: {'loss': ten

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0818.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0818.pth


INFO:model.base_model:Epoch Step: 819 Train Loss: 100.74516296386719 elapsed: 0.7776849269866943
DEBUG:base_model:epoch 819 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(100.7452, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:819, validation elapsed_time:0.07830190658569336
INFO:base_model:epoch 819 val result: {'loss': tensor(1.1644), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 820 Train Loss: 139.84329223632812 elapsed: 0.7359848022460938
DEBUG:base_model:epoch 820 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(139.8433, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:820, validation elapsed_time:0.08526206016540527
INFO:base_model:epoch 820 val result: {'loss': tensor(0.9803), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 821 Train Loss: 86.63076782226562 elapsed: 0.78102707862854
DEBUG:base_model:epoch 821 train finished. tra

INFO:base_model:epoch 837 val result: {'loss': tensor(0.7992), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 838 Train Loss: 120.25624084472656 elapsed: 0.7836470603942871
DEBUG:base_model:epoch 838 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(120.2562, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:838, validation elapsed_time:0.08846688270568848
INFO:base_model:epoch 838 val result: {'loss': tensor(1.0726), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 839 Train Loss: 102.1380615234375 elapsed: 0.7083289623260498
DEBUG:base_model:epoch 839 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(102.1381, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:839, validation elapsed_time:0.10045599937438965
INFO:base_model:epoch 839 val result: {'loss': tensor(0.5815), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 8

INFO:model.base_model:cur_epoch:856, validation elapsed_time:0.08297109603881836
INFO:base_model:epoch 856 val result: {'loss': tensor(1.1274), 'auc': 0.5064415727025536}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0856.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0856.pth


INFO:model.base_model:Epoch Step: 857 Train Loss: 90.35833740234375 elapsed: 0.631911039352417
DEBUG:base_model:epoch 857 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(90.3583, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:857, validation elapsed_time:0.1318340301513672
INFO:base_model:epoch 857 val result: {'loss': tensor(0.5831), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 858 Train Loss: 76.4499282836914 elapsed: 0.8667078018188477
DEBUG:base_model:epoch 858 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(76.4499, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:858, validation elapsed_time:0.09162306785583496
INFO:base_model:epoch 858 val result: {'loss': tensor(0.7698), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 859 Train Loss: 100.8622055053711 elapsed: 0.8467299938201904
DEBUG:base_model:epoc

INFO:model.base_model:cur_epoch:875, validation elapsed_time:0.09241914749145508
INFO:base_model:epoch 875 val result: {'loss': tensor(0.8640), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 876 Train Loss: 79.69341278076172 elapsed: 0.6880528926849365
DEBUG:base_model:epoch 876 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(79.6934, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:876, validation elapsed_time:0.10367107391357422
INFO:base_model:epoch 876 val result: {'loss': tensor(0.7360), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 877 Train Loss: 123.14627838134766 elapsed: 0.6278939247131348
DEBUG:base_model:epoch 877 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(123.1463, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:877, validation elapsed_time:0.16195297241210938
INFO:base_model:epoch 877 val r

INFO:model.base_model:cur_epoch:894, validation elapsed_time:0.08273792266845703
INFO:base_model:epoch 894 val result: {'loss': tensor(0.7530), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 895 Train Loss: 76.67710876464844 elapsed: 0.9201779365539551
DEBUG:base_model:epoch 895 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(76.6771, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:895, validation elapsed_time:0.1650707721710205
INFO:base_model:epoch 895 val result: {'loss': tensor(0.9065), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 896 Train Loss: 112.78231048583984 elapsed: 0.7186717987060547
DEBUG:base_model:epoch 896 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(112.7823, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:896, validation elapsed_time:0.08070588111877441
INFO:base_model:epoch 896 val re

INFO:model.base_model:cur_epoch:913, validation elapsed_time:0.18492722511291504
INFO:base_model:epoch 913 val result: {'loss': tensor(1.2241), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 914 Train Loss: 100.92315673828125 elapsed: 0.7645561695098877
DEBUG:base_model:epoch 914 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(100.9232, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:914, validation elapsed_time:0.09175300598144531
INFO:base_model:epoch 914 val result: {'loss': tensor(0.6517), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 915 Train Loss: 147.79803466796875 elapsed: 0.7681078910827637
DEBUG:base_model:epoch 915 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(147.7980, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:915, validation elapsed_time:0.15675592422485352
INFO:base_model:epoch 915 val result: {'loss

INFO:model.base_model:cur_epoch:932, validation elapsed_time:0.1428670883178711
INFO:base_model:epoch 932 val result: {'loss': tensor(1.1272), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 933 Train Loss: 97.31617736816406 elapsed: 0.6478619575500488
DEBUG:base_model:epoch 933 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(97.3162, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:933, validation elapsed_time:0.08180117607116699
INFO:base_model:epoch 933 val result: {'loss': tensor(0.5871), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 934 Train Loss: 130.4908905029297 elapsed: 0.6270129680633545
DEBUG:base_model:epoch 934 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(130.4909, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:934, validation elapsed_time:0.2627289295196533
INFO:base_model:epoch 934 val resu

INFO:model.base_model:cur_epoch:951, validation elapsed_time:0.13455796241760254
INFO:base_model:epoch 951 val result: {'loss': tensor(0.6724), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 952 Train Loss: 81.32421112060547 elapsed: 0.8249020576477051
DEBUG:base_model:epoch 952 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(81.3242, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:952, validation elapsed_time:0.08162403106689453
INFO:base_model:epoch 952 val result: {'loss': tensor(0.9142), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 953 Train Loss: 120.44365692138672 elapsed: 0.7479748725891113
DEBUG:base_model:epoch 953 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(120.4437, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:953, validation elapsed_time:0.13773274421691895
INFO:base_model:epoch 953 val result: {'loss':

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0954.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0954.pth


INFO:model.base_model:Epoch Step: 955 Train Loss: 138.15684509277344 elapsed: 0.6331479549407959
DEBUG:base_model:epoch 955 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(138.1568, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:955, validation elapsed_time:0.20435094833374023
INFO:base_model:epoch 955 val result: {'loss': tensor(1.4322), 'auc': 0.49356246791276065}
INFO:model.base_model:Epoch Step: 956 Train Loss: 92.6501693725586 elapsed: 0.7747929096221924
DEBUG:base_model:epoch 956 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(92.6502, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:956, validation elapsed_time:0.09384894371032715
INFO:base_model:epoch 956 val result: {'loss': tensor(0.8914), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 957 Train Loss: 145.0597686767578 elapsed: 0.6975948810577393
DEBUG:base_model

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0960.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0960.pth


INFO:model.base_model:Epoch Step: 961 Train Loss: 107.56255340576172 elapsed: 0.6722419261932373
DEBUG:base_model:epoch 961 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(107.5626, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:961, validation elapsed_time:0.07996010780334473
INFO:base_model:epoch 961 val result: {'loss': tensor(0.6248), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 962 Train Loss: 113.8153305053711 elapsed: 0.7431530952453613
DEBUG:base_model:epoch 962 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(113.8153, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:962, validation elapsed_time:0.08005213737487793
INFO:base_model:epoch 962 val result: {'loss': tensor(1.2521), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 963 Train Loss: 163.5128631591797 elapsed: 0.8033928871154785
DEBUG:base_model:epoch 963 tra

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0966.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0966.pth


INFO:model.base_model:Epoch Step: 967 Train Loss: 98.89384460449219 elapsed: 0.66180419921875
DEBUG:base_model:epoch 967 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(98.8938, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:967, validation elapsed_time:0.10503292083740234
INFO:base_model:epoch 967 val result: {'loss': tensor(0.6209), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 968 Train Loss: 106.15814971923828 elapsed: 0.6527609825134277
DEBUG:base_model:epoch 968 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(106.1581, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:968, validation elapsed_time:0.08237504959106445
INFO:base_model:epoch 968 val result: {'loss': tensor(1.1086), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 969 Train Loss: 134.53018188476562 elapsed: 0.8459367752075195
DEBUG:base_model:epoch 969 trai

INFO:base_model:epoch 985 val result: {'loss': tensor(1.5714), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 986 Train Loss: 166.753662109375 elapsed: 0.9116373062133789
DEBUG:base_model:epoch 986 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(166.7537, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:986, validation elapsed_time:0.11324310302734375
INFO:base_model:epoch 986 val result: {'loss': tensor(1.0945), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 987 Train Loss: 89.63580322265625 elapsed: 0.916705846786499
DEBUG:base_model:epoch 987 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(89.6358, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:987, validation elapsed_time:0.12019491195678711
INFO:base_model:epoch 987 val result: {'loss': tensor(0.6480), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 988 T

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0989.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_0989.pth


INFO:model.base_model:Epoch Step: 990 Train Loss: 88.82500457763672 elapsed: 0.8185231685638428
DEBUG:base_model:epoch 990 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(88.8250, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:990, validation elapsed_time:0.09346294403076172
INFO:base_model:epoch 990 val result: {'loss': tensor(1.0086), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 991 Train Loss: 138.44717407226562 elapsed: 0.6863851547241211
DEBUG:base_model:epoch 991 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(138.4472, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:991, validation elapsed_time:0.12112188339233398
INFO:base_model:epoch 991 val result: {'loss': tensor(1.2155), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 992 Train Loss: 129.6813507080078 elapsed: 0.6597118377685547
DEBUG:base_model:epoch 992 trai

INFO:model.base_model:cur_epoch:1008, validation elapsed_time:0.09456038475036621
INFO:base_model:epoch 1008 val result: {'loss': tensor(0.7193), 'auc': 0.49959415584415584}
INFO:model.base_model:Epoch Step: 1009 Train Loss: 99.95228576660156 elapsed: 0.786372184753418
DEBUG:base_model:epoch 1009 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(99.9523, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1009, validation elapsed_time:0.08360004425048828
INFO:base_model:epoch 1009 val result: {'loss': tensor(0.9268), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1010 Train Loss: 109.0233383178711 elapsed: 0.7782900333404541
DEBUG:base_model:epoch 1010 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(109.0233, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1010, validation elapsed_time:0.12491703033447266
INFO:base_model:epoch 1010 val result:

DEBUG:base_model:epoch 1027 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(125.5375, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1027, validation elapsed_time:0.1325969696044922
INFO:base_model:epoch 1027 val result: {'loss': tensor(0.8235), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1028 Train Loss: 76.62604522705078 elapsed: 0.752528190612793
DEBUG:base_model:epoch 1028 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(76.6260, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1028, validation elapsed_time:0.1150808334350586
INFO:base_model:epoch 1028 val result: {'loss': tensor(0.7482), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1029 Train Loss: 130.12493896484375 elapsed: 0.6897909641265869
DEBUG:base_model:epoch 1029 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_t

INFO:base_model:epoch 1045 val result: {'loss': tensor(0.6950), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1046 Train Loss: 105.155029296875 elapsed: 0.8554749488830566
DEBUG:base_model:epoch 1046 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(105.1550, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1046, validation elapsed_time:0.14674091339111328
INFO:base_model:epoch 1046 val result: {'loss': tensor(1.3897), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1047 Train Loss: 186.72268676757812 elapsed: 0.7660410404205322
DEBUG:base_model:epoch 1047 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(186.7227, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1047, validation elapsed_time:0.07799983024597168
INFO:base_model:epoch 1047 val result: {'loss': tensor(1.4977), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1048 Trai

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1050.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1050.pth


INFO:model.base_model:Epoch Step: 1051 Train Loss: 100.60693359375 elapsed: 0.644960880279541
DEBUG:base_model:epoch 1051 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(100.6069, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1051, validation elapsed_time:0.0808100700378418
INFO:base_model:epoch 1051 val result: {'loss': tensor(1.2660), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1052 Train Loss: 178.5496368408203 elapsed: 0.6656310558319092
DEBUG:base_model:epoch 1052 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(178.5496, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1052, validation elapsed_time:0.26129770278930664
INFO:base_model:epoch 1052 val result: {'loss': tensor(1.5847), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1053 Train Loss: 162.45916748046875 elapsed: 0.7375180721282959
DEBUG:base_model:epoch 1053 train finish

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1056.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1056.pth


INFO:model.base_model:Epoch Step: 1057 Train Loss: 92.52287292480469 elapsed: 0.7607858180999756
DEBUG:base_model:epoch 1057 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(92.5229, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1057, validation elapsed_time:0.11533498764038086
INFO:base_model:epoch 1057 val result: {'loss': tensor(1.0650), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1058 Train Loss: 149.24452209472656 elapsed: 0.6180140972137451
DEBUG:base_model:epoch 1058 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(149.2445, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1058, validation elapsed_time:0.0999898910522461
INFO:base_model:epoch 1058 val result: {'loss': tensor(1.3326), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1059 Train Loss: 142.63333129882812 elapsed: 0.8418419361114502
DEBUG:base_model:epoch 1059 train fin

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1065.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1065.pth


INFO:model.base_model:Epoch Step: 1066 Train Loss: 133.3353271484375 elapsed: 0.9377310276031494
DEBUG:base_model:epoch 1066 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(133.3353, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1066, validation elapsed_time:0.13667726516723633
INFO:base_model:epoch 1066 val result: {'loss': tensor(1.6178), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1067 Train Loss: 122.98407745361328 elapsed: 0.777184009552002
DEBUG:base_model:epoch 1067 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(122.9841, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1067, validation elapsed_time:0.10171103477478027
INFO:base_model:epoch 1067 val result: {'loss': tensor(0.6061), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1068 Train Loss: 86.59113311767578 elapsed: 0.6900861263275146
DEBUG:

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1072.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1072.pth


INFO:model.base_model:Epoch Step: 1073 Train Loss: 94.98165130615234 elapsed: 0.834190845489502
DEBUG:base_model:epoch 1073 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(94.9817, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1073, validation elapsed_time:0.0906989574432373
INFO:base_model:epoch 1073 val result: {'loss': tensor(0.6432), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1074 Train Loss: 84.25242614746094 elapsed: 0.8155062198638916
DEBUG:base_model:epoch 1074 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(84.2524, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1074, validation elapsed_time:0.15548968315124512
INFO:base_model:epoch 1074 val result: {'loss': tensor(1.0044), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1075 Train Loss: 133.93356323242188 elapsed: 0.6428329944610596
DEBUG:base_model:epoch 10

INFO:model.base_model:cur_epoch:1091, validation elapsed_time:0.08209681510925293
INFO:base_model:epoch 1091 val result: {'loss': tensor(1.0588), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1092 Train Loss: 107.2251968383789 elapsed: 0.8071997165679932
DEBUG:base_model:epoch 1092 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(107.2252, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1092, validation elapsed_time:0.10770821571350098
INFO:base_model:epoch 1092 val result: {'loss': tensor(0.7049), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1093 Train Loss: 74.90728759765625 elapsed: 0.7723948955535889
DEBUG:base_model:epoch 1093 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(74.9073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1093, validation elapsed_time:0.08036494255065918
INFO:base_model:epoch 1093 val result: {'loss': tenso

DEBUG:base_model:epoch 1110 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(76.0661, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1110, validation elapsed_time:0.08076810836791992
INFO:base_model:epoch 1110 val result: {'loss': tensor(0.8533), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1111 Train Loss: 125.9493408203125 elapsed: 0.675818920135498
DEBUG:base_model:epoch 1111 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(125.9493, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1111, validation elapsed_time:0.18267607688903809
INFO:base_model:epoch 1111 val result: {'loss': tensor(0.9452), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1112 Train Loss: 75.89327239990234 elapsed: 0.8412230014801025
DEBUG:base_model:epoch 1112 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1126.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1126.pth


INFO:model.base_model:Epoch Step: 1127 Train Loss: 83.83658599853516 elapsed: 0.6470520496368408
DEBUG:base_model:epoch 1127 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(83.8366, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1127, validation elapsed_time:0.08225011825561523
INFO:base_model:epoch 1127 val result: {'loss': tensor(0.8126), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1128 Train Loss: 128.05593872070312 elapsed: 0.7366180419921875
DEBUG:base_model:epoch 1128 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(128.0559, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1128, validation elapsed_time:0.1918649673461914
INFO:base_model:epoch 1128 val result: {'loss': tensor(1.2520), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1129 Train Loss: 142.39588928222656 elapsed: 0.8059587478637695
DEBUG:b

INFO:model.base_model:cur_epoch:1145, validation elapsed_time:0.15848183631896973
INFO:base_model:epoch 1145 val result: {'loss': tensor(0.5783), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1146 Train Loss: 108.2586669921875 elapsed: 0.7753610610961914
DEBUG:base_model:epoch 1146 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(108.2587, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1146, validation elapsed_time:0.08045482635498047
INFO:base_model:epoch 1146 val result: {'loss': tensor(1.4003), 'auc': 0.5060337082390522}
INFO:model.base_model:Epoch Step: 1147 Train Loss: 100.37211608886719 elapsed: 0.7872262001037598
DEBUG:base_model:epoch 1147 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(100.3721, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1147, validation elapsed_time:0.1423168182373047
INFO:base_model:epoch 

DEBUG:base_model:epoch 1164 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(148.6087, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1164, validation elapsed_time:0.1308581829071045
INFO:base_model:epoch 1164 val result: {'loss': tensor(1.4320), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1165 Train Loss: 155.1522979736328 elapsed: 0.6566321849822998
DEBUG:base_model:epoch 1165 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(155.1523, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1165, validation elapsed_time:0.08935189247131348
INFO:base_model:epoch 1165 val result: {'loss': tensor(1.0003), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1166 Train Loss: 84.05792999267578 elapsed: 0.6013751029968262
DEBUG:base_model:epoch 1166 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1166.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1166.pth


INFO:model.base_model:Epoch Step: 1167 Train Loss: 133.96844482421875 elapsed: 0.7967522144317627
DEBUG:base_model:epoch 1167 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(133.9684, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1167, validation elapsed_time:0.0949399471282959
INFO:base_model:epoch 1167 val result: {'loss': tensor(1.1806), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1168 Train Loss: 83.7825927734375 elapsed: 0.7262201309204102
DEBUG:base_model:epoch 1168 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(83.7826, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1168, validation elapsed_time:0.0787057876586914
INFO:base_model:epoch 1168 val result: {'loss': tensor(0.7795), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1169 Train Loss: 136.88589477539062 elapsed: 0.7272000312805176
DEBUG:bas

INFO:model.base_model:cur_epoch:1185, validation elapsed_time:0.09805893898010254
INFO:base_model:epoch 1185 val result: {'loss': tensor(1.5860), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1186 Train Loss: 142.68020629882812 elapsed: 0.7067368030548096
DEBUG:base_model:epoch 1186 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(142.6802, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1186, validation elapsed_time:0.07948994636535645
INFO:base_model:epoch 1186 val result: {'loss': tensor(0.5785), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1187 Train Loss: 126.42938995361328 elapsed: 0.716447114944458
DEBUG:base_model:epoch 1187 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(126.4294, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1187, validation elapsed_time:0.07860970497131348
INFO:base_model:epoch

DEBUG:base_model:epoch 1204 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(136.2938, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1204, validation elapsed_time:0.127640962600708
INFO:base_model:epoch 1204 val result: {'loss': tensor(0.9845), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1205 Train Loss: 86.53437042236328 elapsed: 0.7830736637115479
DEBUG:base_model:epoch 1205 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(86.5344, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1205, validation elapsed_time:0.12247300148010254
INFO:base_model:epoch 1205 val result: {'loss': tensor(0.6239), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1206 Train Loss: 140.09359741210938 elapsed: 0.7361938953399658
DEBUG:base_model:epoch 1206 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1220.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1220.pth


INFO:model.base_model:Epoch Step: 1221 Train Loss: 78.75609588623047 elapsed: 0.8184688091278076
DEBUG:base_model:epoch 1221 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(78.7561, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1221, validation elapsed_time:0.07980704307556152
INFO:base_model:epoch 1221 val result: {'loss': tensor(0.8726), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1222 Train Loss: 135.58558654785156 elapsed: 0.7569398880004883
DEBUG:base_model:epoch 1222 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(135.5856, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1222, validation elapsed_time:0.19766497611999512
INFO:base_model:epoch 1222 val result: {'loss': tensor(1.2941), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1223 Train Loss: 144.83917236328125 elapsed: 0.7375612258911133
DEBUG:base_model:epoc

INFO:model.base_model:cur_epoch:1239, validation elapsed_time:0.08333992958068848
INFO:base_model:epoch 1239 val result: {'loss': tensor(1.3354), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1240 Train Loss: 129.2516632080078 elapsed: 0.8176019191741943
DEBUG:base_model:epoch 1240 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(129.2517, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1240, validation elapsed_time:0.14877700805664062
INFO:base_model:epoch 1240 val result: {'loss': tensor(0.7673), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1241 Train Loss: 73.18582153320312 elapsed: 0.8207643032073975
DEBUG:base_model:epoch 1241 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(73.1858, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1241, validation elapsed_time:0.07905197143554688
INFO:base_model:epoch 1241 val result: {'loss': tenso

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1250.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1250.pth


INFO:model.base_model:Epoch Step: 1251 Train Loss: 142.69615173339844 elapsed: 0.8053827285766602
DEBUG:base_model:epoch 1251 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(142.6962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1251, validation elapsed_time:0.12751293182373047
INFO:base_model:epoch 1251 val result: {'loss': tensor(0.8452), 'auc': 0.5064415727025536}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1251.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1251.pth


INFO:model.base_model:Epoch Step: 1252 Train Loss: 85.69204711914062 elapsed: 0.629767894744873
DEBUG:base_model:epoch 1252 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(85.6920, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1252, validation elapsed_time:0.07873225212097168
INFO:base_model:epoch 1252 val result: {'loss': tensor(1.0748), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1253 Train Loss: 171.11880493164062 elapsed: 0.7909049987792969
DEBUG:base_model:epoch 1253 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(171.1188, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1253, validation elapsed_time:0.22674894332885742
INFO:base_model:epoch 1253 val result: {'loss': tensor(1.6214), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1254 Train Loss: 168.3599853515625 elapsed: 0.7084989547729492
DEBUG:base_model:epoch 1254 train fini

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1255.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1255.pth


INFO:model.base_model:Epoch Step: 1256 Train Loss: 156.99876403808594 elapsed: 0.7296719551086426
DEBUG:base_model:epoch 1256 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(156.9988, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1256, validation elapsed_time:0.08079314231872559
INFO:base_model:epoch 1256 val result: {'loss': tensor(1.0700), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1257 Train Loss: 81.03532409667969 elapsed: 0.7575831413269043
DEBUG:base_model:epoch 1257 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(81.0353, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1257, validation elapsed_time:0.12841105461120605
INFO:base_model:epoch 1257 val result: {'loss': tensor(0.8387), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1258 Train Loss: 129.7731475830078 elapsed: 0.7935748100280762
DEBUG:base_model:epoc

INFO:model.base_model:cur_epoch:1274, validation elapsed_time:0.10110878944396973
INFO:base_model:epoch 1274 val result: {'loss': tensor(0.6740), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1275 Train Loss: 147.63479614257812 elapsed: 0.810359001159668
DEBUG:base_model:epoch 1275 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(147.6348, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1275, validation elapsed_time:0.1156013011932373
INFO:base_model:epoch 1275 val result: {'loss': tensor(1.5585), 'auc': 0.5060337082390522}
INFO:model.base_model:Epoch Step: 1276 Train Loss: 99.75296783447266 elapsed: 0.7435958385467529
DEBUG:base_model:epoch 1276 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(99.7530, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1276, validation elapsed_time:0.07900691032409668
INFO:base_model:epoch 12

DEBUG:base_model:epoch 1293 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(94.4815, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1293, validation elapsed_time:0.14541983604431152
INFO:base_model:epoch 1293 val result: {'loss': tensor(0.6727), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1294 Train Loss: 123.54175567626953 elapsed: 0.7395148277282715
DEBUG:base_model:epoch 1294 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(123.5418, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1294, validation elapsed_time:0.08549213409423828
INFO:base_model:epoch 1294 val result: {'loss': tensor(1.3361), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1295 Train Loss: 170.88607788085938 elapsed: 0.7068150043487549
DEBUG:base_model:epoch 1295 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tens

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1298.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1298.pth


INFO:model.base_model:Epoch Step: 1299 Train Loss: 111.39083862304688 elapsed: 0.5999569892883301
DEBUG:base_model:epoch 1299 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(111.3908, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1299, validation elapsed_time:0.09403705596923828
INFO:base_model:epoch 1299 val result: {'loss': tensor(0.6598), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1300 Train Loss: 75.82511901855469 elapsed: 0.6798498630523682
DEBUG:base_model:epoch 1300 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(75.8251, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1300, validation elapsed_time:0.19483184814453125
INFO:base_model:epoch 1300 val result: {'loss': tensor(0.8205), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1301 Train Loss: 120.83897399902344 elapsed: 0.7421913146972656
DEBUG:

INFO:model.base_model:cur_epoch:1317, validation elapsed_time:0.07640719413757324
INFO:base_model:epoch 1317 val result: {'loss': tensor(0.9898), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 1318 Train Loss: 179.9896697998047 elapsed: 0.7648048400878906
DEBUG:base_model:epoch 1318 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(179.9897, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1318, validation elapsed_time:0.09031391143798828
INFO:base_model:epoch 1318 val result: {'loss': tensor(1.1101), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1319 Train Loss: 96.17623138427734 elapsed: 0.7889771461486816
DEBUG:base_model:epoch 1319 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(96.1762, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1319, validation elapsed_time:0.12560796737670898
INFO:base_model:epoch 1

DEBUG:base_model:epoch 1336 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(120.5857, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1336, validation elapsed_time:0.11055684089660645
INFO:base_model:epoch 1336 val result: {'loss': tensor(0.6031), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1337 Train Loss: 143.3482666015625 elapsed: 0.67582106590271
DEBUG:base_model:epoch 1337 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(143.3483, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1337, validation elapsed_time:0.07960009574890137
INFO:base_model:epoch 1337 val result: {'loss': tensor(2.2928), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1338 Train Loss: 162.57518005371094 elapsed: 0.8500783443450928
DEBUG:base_model:epoch 1338 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_

INFO:base_model:epoch 1354 val result: {'loss': tensor(1.7878), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1355 Train Loss: 205.8272705078125 elapsed: 0.7443289756774902
DEBUG:base_model:epoch 1355 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(205.8273, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1355, validation elapsed_time:0.08232688903808594
INFO:base_model:epoch 1355 val result: {'loss': tensor(1.4930), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1356 Train Loss: 117.5385513305664 elapsed: 0.7243711948394775
DEBUG:base_model:epoch 1356 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(117.5386, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1356, validation elapsed_time:0.08503389358520508
INFO:base_model:epoch 1356 val result: {'loss': tensor(0.6058), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1357 Trai

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1372.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1372.pth


INFO:model.base_model:Epoch Step: 1373 Train Loss: 215.2467803955078 elapsed: 1.6971549987792969
DEBUG:base_model:epoch 1373 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(215.2468, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1373, validation elapsed_time:0.39317822456359863
INFO:base_model:epoch 1373 val result: {'loss': tensor(0.7633), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1374 Train Loss: 120.94813537597656 elapsed: 1.7180516719818115
DEBUG:base_model:epoch 1374 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(120.9481, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1374, validation elapsed_time:0.4336857795715332
INFO:base_model:epoch 1374 val result: {'loss': tensor(1.7155), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1375 Train Loss: 240.7173614501953 elapsed: 1.1725187301635742
DEBUG:base_model:epoc

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1381.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1381.pth


INFO:model.base_model:Epoch Step: 1382 Train Loss: 210.01048278808594 elapsed: 0.9880077838897705
DEBUG:base_model:epoch 1382 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(210.0105, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1382, validation elapsed_time:0.15171289443969727
INFO:base_model:epoch 1382 val result: {'loss': tensor(1.9456), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1383 Train Loss: 111.65898895263672 elapsed: 0.8478360176086426
DEBUG:base_model:epoch 1383 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(111.6590, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1383, validation elapsed_time:0.12693500518798828
INFO:base_model:epoch 1383 val result: {'loss': tensor(1.2329), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1384 Train Loss: 174.52432250976562 elapsed: 0.7073650360107422
DEBUG:base_model:ep

INFO:base_model:epoch 1400 val result: {'loss': tensor(1.9197), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1401 Train Loss: 197.75540161132812 elapsed: 0.7876632213592529
DEBUG:base_model:epoch 1401 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(197.7554, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1401, validation elapsed_time:0.1327371597290039
INFO:base_model:epoch 1401 val result: {'loss': tensor(1.1566), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1402 Train Loss: 112.7516860961914 elapsed: 0.8385779857635498
DEBUG:base_model:epoch 1402 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(112.7517, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1402, validation elapsed_time:0.12539267539978027
INFO:base_model:epoch 1402 val result: {'loss': tensor(2.0968), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1418.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1418.pth


INFO:model.base_model:Epoch Step: 1419 Train Loss: 266.7648620605469 elapsed: 0.8655433654785156
DEBUG:base_model:epoch 1419 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(266.7649, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1419, validation elapsed_time:0.11562395095825195
INFO:base_model:epoch 1419 val result: {'loss': tensor(2.6048), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1420 Train Loss: 131.7704620361328 elapsed: 0.9612138271331787
DEBUG:base_model:epoch 1420 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(131.7705, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1420, validation elapsed_time:0.1437819004058838
INFO:base_model:epoch 1420 val result: {'loss': tensor(1.0246), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1421 Train Loss: 197.15457153320312 elapsed: 0.8437230587005615
DEBUG:base_model:epoc

INFO:model.base_model:cur_epoch:1437, validation elapsed_time:0.16161108016967773
INFO:base_model:epoch 1437 val result: {'loss': tensor(2.7762), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 1438 Train Loss: 183.3603973388672 elapsed: 0.8564238548278809
DEBUG:base_model:epoch 1438 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(183.3604, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1438, validation elapsed_time:0.0907132625579834
INFO:base_model:epoch 1438 val result: {'loss': tensor(0.5845), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1439 Train Loss: 113.05081939697266 elapsed: 0.6575009822845459
DEBUG:base_model:epoch 1439 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(113.0508, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1439, validation elapsed_time:0.23033690452575684
INFO:base_model:epoch 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1443.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1443.pth


INFO:model.base_model:Epoch Step: 1444 Train Loss: 115.24687194824219 elapsed: 0.8797786235809326
DEBUG:base_model:epoch 1444 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(115.2469, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1444, validation elapsed_time:0.1358931064605713
INFO:base_model:epoch 1444 val result: {'loss': tensor(0.7782), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1445 Train Loss: 145.50985717773438 elapsed: 0.8710181713104248
DEBUG:base_model:epoch 1445 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(145.5099, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1445, validation elapsed_time:0.08557987213134766
INFO:base_model:epoch 1445 val result: {'loss': tensor(1.5180), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1446 Train Loss: 177.1972198486328 elapsed: 1.0113580226898193
DEBUG:base_model:epoc

INFO:model.base_model:cur_epoch:1462, validation elapsed_time:0.08408403396606445
INFO:base_model:epoch 1462 val result: {'loss': tensor(2.4782), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 1463 Train Loss: 326.05609130859375 elapsed: 0.7652950286865234
DEBUG:base_model:epoch 1463 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(326.0561, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1463, validation elapsed_time:0.08595514297485352
INFO:base_model:epoch 1463 val result: {'loss': tensor(1.5098), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1464 Train Loss: 122.60066986083984 elapsed: 0.99668288230896
DEBUG:base_model:epoch 1464 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(122.6007, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1464, validation elapsed_time:0.1969280242919922
INFO:base_model:epoch 1

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1473.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1473.pth


INFO:model.base_model:Epoch Step: 1474 Train Loss: 240.2988739013672 elapsed: 1.0194580554962158
DEBUG:base_model:epoch 1474 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(240.2989, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1474, validation elapsed_time:0.1825408935546875
INFO:base_model:epoch 1474 val result: {'loss': tensor(1.6932), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1475 Train Loss: 108.55067443847656 elapsed: 1.1096069812774658
DEBUG:base_model:epoch 1475 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(108.5507, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1475, validation elapsed_time:0.12490224838256836
INFO:base_model:epoch 1475 val result: {'loss': tensor(1.3359), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1476 Train Loss: 211.08798217773438 elapsed: 0.6129999160766602
DEBUG:base_model:epoc

INFO:base_model:epoch 1492 val result: {'loss': tensor(2.0248), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1493 Train Loss: 226.23886108398438 elapsed: 0.884105920791626
DEBUG:base_model:epoch 1493 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(226.2389, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1493, validation elapsed_time:0.08177495002746582
INFO:base_model:epoch 1493 val result: {'loss': tensor(1.5648), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1494 Train Loss: 119.2401351928711 elapsed: 0.7546231746673584
DEBUG:base_model:epoch 1494 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(119.2401, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1494, validation elapsed_time:0.21201586723327637
INFO:base_model:epoch 1494 val result: {'loss': tensor(0.6008), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1495 Trai

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1496.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1496.pth


INFO:model.base_model:Epoch Step: 1497 Train Loss: 126.36859893798828 elapsed: 0.7632341384887695
DEBUG:base_model:epoch 1497 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(126.3686, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1497, validation elapsed_time:0.15550518035888672
INFO:base_model:epoch 1497 val result: {'loss': tensor(1.6142), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1498 Train Loss: 224.85043334960938 elapsed: 0.7633531093597412
DEBUG:base_model:epoch 1498 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(224.8504, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1498, validation elapsed_time:0.10939407348632812
INFO:base_model:epoch 1498 val result: {'loss': tensor(2.0014), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1499 Train Loss: 214.94822692871094 elapsed: 0.7041661739349365
DEBUG:base_model:epoch 1499 train 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1500.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1500.pth


INFO:model.base_model:Epoch Step: 1501 Train Loss: 203.7103729248047 elapsed: 0.9161999225616455
DEBUG:base_model:epoch 1501 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(203.7104, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1501, validation elapsed_time:0.12626004219055176
INFO:base_model:epoch 1501 val result: {'loss': tensor(2.3103), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1502 Train Loss: 129.7472686767578 elapsed: 0.7759008407592773
DEBUG:base_model:epoch 1502 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(129.7473, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1502, validation elapsed_time:0.08014488220214844
INFO:base_model:epoch 1502 val result: {'loss': tensor(0.7503), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1503 Train Loss: 149.5648956298828 elapsed: 0.8803558349609375
DEBUG:b

INFO:model.base_model:cur_epoch:1519, validation elapsed_time:0.08146166801452637
INFO:base_model:epoch 1519 val result: {'loss': tensor(2.4538), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1520 Train Loss: 209.58694458007812 elapsed: 0.7679972648620605
DEBUG:base_model:epoch 1520 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(209.5869, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1520, validation elapsed_time:0.09846806526184082
INFO:base_model:epoch 1520 val result: {'loss': tensor(0.8087), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1521 Train Loss: 189.91282653808594 elapsed: 0.8646368980407715
DEBUG:base_model:epoch 1521 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(189.9128, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1521, validation elapsed_time:0.21950697898864746
INFO:base_model:epoch 1521 val resu

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1527.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1527.pth


INFO:model.base_model:Epoch Step: 1528 Train Loss: 120.60759735107422 elapsed: 0.8286921977996826
DEBUG:base_model:epoch 1528 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(120.6076, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1528, validation elapsed_time:0.08717179298400879
INFO:base_model:epoch 1528 val result: {'loss': tensor(1.6223), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1529 Train Loss: 219.4013214111328 elapsed: 0.8676469326019287
DEBUG:base_model:epoch 1529 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(219.4013, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1529, validation elapsed_time:0.12734293937683105
INFO:base_model:epoch 1529 val result: {'loss': tensor(1.7839), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1530 Train Loss: 149.6003875732422 elapsed: 0.8127598762512207
DEBUG:base_model:epoch 1530 train fi

INFO:base_model:epoch 1546 val result: {'loss': tensor(2.5111), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1547 Train Loss: 236.62466430664062 elapsed: 0.783682107925415
DEBUG:base_model:epoch 1547 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(236.6247, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1547, validation elapsed_time:0.12331628799438477
INFO:base_model:epoch 1547 val result: {'loss': tensor(1.3031), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1548 Train Loss: 105.58602905273438 elapsed: 0.6551592350006104
DEBUG:base_model:epoch 1548 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(105.5860, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1548, validation elapsed_time:0.13515496253967285
INFO:base_model:epoch 1548 val result: {'loss': tensor(1.6876), 'auc': 0.5064415727025536}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1548.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1548.pth


INFO:model.base_model:Epoch Step: 1549 Train Loss: 291.8585510253906 elapsed: 0.7660691738128662
DEBUG:base_model:epoch 1549 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(291.8586, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1549, validation elapsed_time:0.08449387550354004
INFO:base_model:epoch 1549 val result: {'loss': tensor(2.4035), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1550 Train Loss: 124.28634643554688 elapsed: 0.8432149887084961
DEBUG:base_model:epoch 1550 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(124.2863, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1550, validation elapsed_time:0.1442720890045166
INFO:base_model:epoch 1550 val result: {'loss': tensor(1.1107), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1551 Train Loss: 215.14967346191406 elapsed: 0.7633941173553467
DEBUG:

INFO:model.base_model:cur_epoch:1567, validation elapsed_time:0.32674479484558105
INFO:base_model:epoch 1567 val result: {'loss': tensor(0.5885), 'auc': 0.5064415727025536}


save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1567.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1567.pth


INFO:model.base_model:Epoch Step: 1568 Train Loss: 116.46891784667969 elapsed: 0.9816670417785645
DEBUG:base_model:epoch 1568 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(116.4689, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1568, validation elapsed_time:0.09652996063232422
INFO:base_model:epoch 1568 val result: {'loss': tensor(1.4690), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1569 Train Loss: 209.16990661621094 elapsed: 0.7070660591125488
DEBUG:base_model:epoch 1569 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(209.1699, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1569, validation elapsed_time:0.09253692626953125
INFO:base_model:epoch 1569 val result: {'loss': tensor(1.9097), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1570 Train Loss: 217.92247009277344 elapsed: 0.8420970439910889
DEBUG:base_model:epoch 1570 train 

INFO:model.base_model:cur_epoch:1586, validation elapsed_time:0.08167600631713867
INFO:base_model:epoch 1586 val result: {'loss': tensor(1.8536), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1587 Train Loss: 210.75502014160156 elapsed: 0.7586791515350342
DEBUG:base_model:epoch 1587 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(210.7550, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1587, validation elapsed_time:0.08603501319885254
INFO:base_model:epoch 1587 val result: {'loss': tensor(1.4944), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1588 Train Loss: 115.5624771118164 elapsed: 0.8668129444122314
DEBUG:base_model:epoch 1588 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(115.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1588, validation elapsed_time:0.11047720909118652
INFO:base_model:epoch 1588 val result: {'loss': ten

INFO:model.base_model:cur_epoch:1605, validation elapsed_time:0.14195609092712402
INFO:base_model:epoch 1605 val result: {'loss': tensor(1.5033), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1606 Train Loss: 119.08100891113281 elapsed: 0.6852171421051025
DEBUG:base_model:epoch 1606 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(119.0810, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1606, validation elapsed_time:0.08205485343933105
INFO:base_model:epoch 1606 val result: {'loss': tensor(0.5801), 'auc': 0.49355842729744626}
INFO:model.base_model:Epoch Step: 1607 Train Loss: 172.1184539794922 elapsed: 0.7828168869018555
DEBUG:base_model:epoch 1607 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(172.1185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1607, validation elapsed_time:0.0843348503112793
INFO:base_model:epoch 1607 val resul

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1613.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1613.pth


INFO:model.base_model:Epoch Step: 1614 Train Loss: 117.91476440429688 elapsed: 0.8713867664337158
DEBUG:base_model:epoch 1614 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(117.9148, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1614, validation elapsed_time:0.10826802253723145
INFO:base_model:epoch 1614 val result: {'loss': tensor(1.0049), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1615 Train Loss: 192.1973419189453 elapsed: 1.2502222061157227
DEBUG:base_model:epoch 1615 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(192.1973, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1615, validation elapsed_time:0.24179887771606445
INFO:base_model:epoch 1615 val result: {'loss': tensor(2.0629), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1616 Train Loss: 266.0971984863281 elapsed: 1.111058235168457
DEBUG:base_model:epoch 1616 train fin

INFO:base_model:epoch 1632 val result: {'loss': tensor(1.5164), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1633 Train Loss: 188.3632049560547 elapsed: 0.8525938987731934
DEBUG:base_model:epoch 1633 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(188.3632, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1633, validation elapsed_time:0.1181480884552002
INFO:base_model:epoch 1633 val result: {'loss': tensor(1.2145), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1634 Train Loss: 83.41179656982422 elapsed: 0.8021838665008545
DEBUG:base_model:epoch 1634 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(83.4118, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1634, validation elapsed_time:0.08089661598205566
INFO:base_model:epoch 1634 val result: {'loss': tensor(0.8822), 'auc': 0.5064395523948965}
INFO:model.base

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1642.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1642.pth


INFO:model.base_model:Epoch Step: 1643 Train Loss: 201.00807189941406 elapsed: 0.8920471668243408
DEBUG:base_model:epoch 1643 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(201.0081, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1643, validation elapsed_time:0.23709988594055176
INFO:base_model:epoch 1643 val result: {'loss': tensor(2.2747), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1644 Train Loss: 132.05540466308594 elapsed: 0.8323369026184082
DEBUG:base_model:epoch 1644 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(132.0554, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1644, validation elapsed_time:0.10201096534729004
INFO:base_model:epoch 1644 val result: {'loss': tensor(0.6435), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1645 Train Loss: 126.94508361816406 elapsed: 0.6556730270385742
DEBU

INFO:model.base_model:cur_epoch:1661, validation elapsed_time:0.11819601058959961
INFO:base_model:epoch 1661 val result: {'loss': tensor(1.9332), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1662 Train Loss: 252.74896240234375 elapsed: 0.7907099723815918
DEBUG:base_model:epoch 1662 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(252.7490, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1662, validation elapsed_time:0.12838482856750488
INFO:base_model:epoch 1662 val result: {'loss': tensor(2.1491), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1663 Train Loss: 228.80648803710938 elapsed: 0.832413911819458
DEBUG:base_model:epoch 1663 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(228.8065, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1663, validation elapsed_time:0.12454771995544434
INFO:base_model:epoch 1663 val result: {'loss': ten

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1666.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1666.pth


INFO:model.base_model:Epoch Step: 1667 Train Loss: 87.87794494628906 elapsed: 0.6920938491821289
DEBUG:base_model:epoch 1667 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(87.8779, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1667, validation elapsed_time:0.21889591217041016
INFO:base_model:epoch 1667 val result: {'loss': tensor(1.0687), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1668 Train Loss: 167.38397216796875 elapsed: 0.908390998840332
DEBUG:base_model:epoch 1668 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(167.3840, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1668, validation elapsed_time:0.09393000602722168
INFO:base_model:epoch 1668 val result: {'loss': tensor(1.5110), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1669 Train Loss: 138.53555297851562 elapsed: 0.8212728500366211
DEBUG:base_model:epoch 1669 train fin

INFO:model.base_model:cur_epoch:1685, validation elapsed_time:0.1959218978881836
INFO:base_model:epoch 1685 val result: {'loss': tensor(2.1189), 'auc': 0.4935604476051035}
INFO:model.base_model:Epoch Step: 1686 Train Loss: 134.23236083984375 elapsed: 0.9080169200897217
DEBUG:base_model:epoch 1686 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(134.2324, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1686, validation elapsed_time:0.1388711929321289
INFO:base_model:epoch 1686 val result: {'loss': tensor(1.8344), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1687 Train Loss: 304.6376037597656 elapsed: 0.8675458431243896
DEBUG:base_model:epoch 1687 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(304.6376, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1687, validation elapsed_time:0.0868828296661377
INFO:base_model:epoch 1687 val result: 

save_path ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1692.pth
Success to save to :  ./train_data/CategoricalAttentionSimpleModel_layer1_dmodel16_nhead1_lr0.0001_1692.pth


INFO:model.base_model:Epoch Step: 1693 Train Loss: 127.5804672241211 elapsed: 1.1111412048339844
DEBUG:base_model:epoch 1693 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(127.5805, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1693, validation elapsed_time:0.1280832290649414
INFO:base_model:epoch 1693 val result: {'loss': tensor(1.3661), 'auc': 0.5064395523948965}
INFO:model.base_model:Epoch Step: 1694 Train Loss: 117.48389434814453 elapsed: 1.0038201808929443
DEBUG:base_model:epoch 1694 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(117.4839, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1694, validation elapsed_time:0.12150192260742188
INFO:base_model:epoch 1694 val result: {'loss': tensor(0.9453), 'auc': 0.5064375320872393}
INFO:model.base_model:Epoch Step: 1695 Train Loss: 151.5250701904297 elapsed: 0.6483919620513916
DEBUG:b

INFO:base_model:epoch 1711 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1712 Train Loss: 3155.3486328125 elapsed: 0.851377010345459
DEBUG:base_model:epoch 1712 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1712, validation elapsed_time:0.08687710762023926
INFO:base_model:epoch 1712 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1713 Train Loss: 3162.259521484375 elapsed: 0.8786742687225342
DEBUG:base_model:epoch 1713 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1713, validation elapsed_time:0.14968395233154297
INFO:base_model:epoch 1713 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1714 Train Loss: 3169.

INFO:base_model:epoch 1730 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1731 Train Loss: 3176.081787109375 elapsed: 0.8401319980621338
DEBUG:base_model:epoch 1731 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1731, validation elapsed_time:0.14113903045654297
INFO:base_model:epoch 1731 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1732 Train Loss: 3196.81494140625 elapsed: 0.7943129539489746
DEBUG:base_model:epoch 1732 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1732, validation elapsed_time:0.11845922470092773
INFO:base_model:epoch 1732 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1733 Train Loss: 321

INFO:base_model:epoch 1749 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1750 Train Loss: 3162.259521484375 elapsed: 0.6621077060699463
DEBUG:base_model:epoch 1750 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1750, validation elapsed_time:0.08068108558654785
INFO:base_model:epoch 1750 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1751 Train Loss: 3169.170654296875 elapsed: 0.8404920101165771
DEBUG:base_model:epoch 1751 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1751, validation elapsed_time:0.16338610649108887
INFO:base_model:epoch 1751 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1752 Train Loss: 31

INFO:base_model:epoch 1768 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1769 Train Loss: 3176.081787109375 elapsed: 0.8525130748748779
DEBUG:base_model:epoch 1769 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1769, validation elapsed_time:0.08371400833129883
INFO:base_model:epoch 1769 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1770 Train Loss: 3189.90380859375 elapsed: 0.8968911170959473
DEBUG:base_model:epoch 1770 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1770, validation elapsed_time:0.14630389213562012
INFO:base_model:epoch 1770 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1771 Train Loss: 315

INFO:base_model:epoch 1787 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1788 Train Loss: 3182.99267578125 elapsed: 0.7622418403625488
DEBUG:base_model:epoch 1788 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1788, validation elapsed_time:0.08236289024353027
INFO:base_model:epoch 1788 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1789 Train Loss: 3169.170654296875 elapsed: 0.843926191329956
DEBUG:base_model:epoch 1789 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1789, validation elapsed_time:0.08278298377990723
INFO:base_model:epoch 1789 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1790 Train Loss: 3176

INFO:base_model:epoch 1806 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1807 Train Loss: 3169.170654296875 elapsed: 0.9001858234405518
DEBUG:base_model:epoch 1807 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1807, validation elapsed_time:0.12625408172607422
INFO:base_model:epoch 1807 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1808 Train Loss: 3169.170654296875 elapsed: 0.8213448524475098
DEBUG:base_model:epoch 1808 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1808, validation elapsed_time:0.08073186874389648
INFO:base_model:epoch 1808 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1809 Train Loss: 31

INFO:base_model:epoch 1825 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1826 Train Loss: 3169.170654296875 elapsed: 0.7623207569122314
DEBUG:base_model:epoch 1826 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1826, validation elapsed_time:0.08001399040222168
INFO:base_model:epoch 1826 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1827 Train Loss: 3182.99267578125 elapsed: 0.8399579524993896
DEBUG:base_model:epoch 1827 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1827, validation elapsed_time:0.08199071884155273
INFO:base_model:epoch 1827 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1828 Train Loss: 316

INFO:base_model:epoch 1844 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1845 Train Loss: 3162.259521484375 elapsed: 0.8245339393615723
DEBUG:base_model:epoch 1845 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1845, validation elapsed_time:0.10970401763916016
INFO:base_model:epoch 1845 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1846 Train Loss: 3148.4375 elapsed: 0.7487297058105469
DEBUG:base_model:epoch 1846 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3148.4375, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1846, validation elapsed_time:0.08037304878234863
INFO:base_model:epoch 1846 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1847 Train Loss: 3176.08178

INFO:base_model:epoch 1863 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1864 Train Loss: 3196.81494140625 elapsed: 0.8212740421295166
DEBUG:base_model:epoch 1864 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1864, validation elapsed_time:0.11931991577148438
INFO:base_model:epoch 1864 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1865 Train Loss: 3169.170654296875 elapsed: 0.6352720260620117
DEBUG:base_model:epoch 1865 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1865, validation elapsed_time:0.21388792991638184
INFO:base_model:epoch 1865 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1866 Train Loss: 317

INFO:base_model:epoch 1882 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1883 Train Loss: 3203.72607421875 elapsed: 1.0386319160461426
DEBUG:base_model:epoch 1883 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3203.7261, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1883, validation elapsed_time:0.12579989433288574
INFO:base_model:epoch 1883 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1884 Train Loss: 3162.259521484375 elapsed: 0.7827877998352051
DEBUG:base_model:epoch 1884 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1884, validation elapsed_time:0.08276009559631348
INFO:base_model:epoch 1884 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1885 Train Loss: 317

INFO:base_model:epoch 1901 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1902 Train Loss: 3182.99267578125 elapsed: 0.653806209564209
DEBUG:base_model:epoch 1902 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1902, validation elapsed_time:0.2684972286224365
INFO:base_model:epoch 1902 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1903 Train Loss: 3169.170654296875 elapsed: 0.8114750385284424
DEBUG:base_model:epoch 1903 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1903, validation elapsed_time:0.11473202705383301
INFO:base_model:epoch 1903 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1904 Train Loss: 3182.

INFO:base_model:epoch 1920 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1921 Train Loss: 3196.81494140625 elapsed: 0.7938129901885986
DEBUG:base_model:epoch 1921 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1921, validation elapsed_time:0.15481972694396973
INFO:base_model:epoch 1921 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1922 Train Loss: 3210.636962890625 elapsed: 0.80210280418396
DEBUG:base_model:epoch 1922 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3210.6370, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1922, validation elapsed_time:0.11915230751037598
INFO:base_model:epoch 1922 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1923 Train Loss: 3169.

INFO:base_model:epoch 1939 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1940 Train Loss: 3169.170654296875 elapsed: 0.8416340351104736
DEBUG:base_model:epoch 1940 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1940, validation elapsed_time:0.08278393745422363
INFO:base_model:epoch 1940 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1941 Train Loss: 3162.259521484375 elapsed: 0.9176058769226074
DEBUG:base_model:epoch 1941 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1941, validation elapsed_time:0.13424897193908691
INFO:base_model:epoch 1941 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1942 Train Loss: 31

INFO:base_model:epoch 1958 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1959 Train Loss: 3182.99267578125 elapsed: 0.8844859600067139
DEBUG:base_model:epoch 1959 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1959, validation elapsed_time:0.14727091789245605
INFO:base_model:epoch 1959 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1960 Train Loss: 3176.081787109375 elapsed: 0.7898519039154053
DEBUG:base_model:epoch 1960 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1960, validation elapsed_time:0.15918636322021484
INFO:base_model:epoch 1960 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1961 Train Loss: 316

INFO:base_model:epoch 1977 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1978 Train Loss: 3169.170654296875 elapsed: 0.6591768264770508
DEBUG:base_model:epoch 1978 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1978, validation elapsed_time:0.07988810539245605
INFO:base_model:epoch 1978 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1979 Train Loss: 3162.259521484375 elapsed: 0.7464570999145508
DEBUG:base_model:epoch 1979 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1979, validation elapsed_time:0.1664130687713623
INFO:base_model:epoch 1979 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1980 Train Loss: 318

INFO:base_model:epoch 1996 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1997 Train Loss: 3189.90380859375 elapsed: 0.8369081020355225
DEBUG:base_model:epoch 1997 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1997, validation elapsed_time:0.10342812538146973
INFO:base_model:epoch 1997 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1998 Train Loss: 3182.99267578125 elapsed: 0.7968640327453613
DEBUG:base_model:epoch 1998 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:1998, validation elapsed_time:0.10878109931945801
INFO:base_model:epoch 1998 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 1999 Train Loss: 3176

INFO:base_model:epoch 2015 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2016 Train Loss: 3162.259521484375 elapsed: 0.6592879295349121
DEBUG:base_model:epoch 2016 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2016, validation elapsed_time:0.08155488967895508
INFO:base_model:epoch 2016 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2017 Train Loss: 3189.90380859375 elapsed: 0.7613449096679688
DEBUG:base_model:epoch 2017 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2017, validation elapsed_time:0.19687509536743164
INFO:base_model:epoch 2017 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2018 Train Loss: 316

INFO:base_model:epoch 2034 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2035 Train Loss: 3162.259521484375 elapsed: 0.8281540870666504
DEBUG:base_model:epoch 2035 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2035, validation elapsed_time:0.1483290195465088
INFO:base_model:epoch 2035 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2036 Train Loss: 3189.90380859375 elapsed: 0.6757898330688477
DEBUG:base_model:epoch 2036 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2036, validation elapsed_time:0.08223485946655273
INFO:base_model:epoch 2036 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2037 Train Loss: 3189

INFO:base_model:epoch 2053 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2054 Train Loss: 3182.99267578125 elapsed: 0.6683793067932129
DEBUG:base_model:epoch 2054 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2054, validation elapsed_time:0.08114814758300781
INFO:base_model:epoch 2054 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2055 Train Loss: 3176.081787109375 elapsed: 0.8682379722595215
DEBUG:base_model:epoch 2055 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2055, validation elapsed_time:0.16089820861816406
INFO:base_model:epoch 2055 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2056 Train Loss: 318

INFO:base_model:epoch 2072 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2073 Train Loss: 3169.170654296875 elapsed: 0.822624921798706
DEBUG:base_model:epoch 2073 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2073, validation elapsed_time:0.1184849739074707
INFO:base_model:epoch 2073 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2074 Train Loss: 3176.081787109375 elapsed: 0.6776549816131592
DEBUG:base_model:epoch 2074 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2074, validation elapsed_time:0.27257490158081055
INFO:base_model:epoch 2074 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2075 Train Loss: 3196

INFO:base_model:epoch 2091 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2092 Train Loss: 3182.99267578125 elapsed: 0.913844108581543
DEBUG:base_model:epoch 2092 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2092, validation elapsed_time:0.19081401824951172
INFO:base_model:epoch 2092 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2093 Train Loss: 3182.99267578125 elapsed: 0.7990028858184814
DEBUG:base_model:epoch 2093 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2093, validation elapsed_time:0.0829918384552002
INFO:base_model:epoch 2093 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2094 Train Loss: 3169.1

INFO:base_model:epoch 2110 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2111 Train Loss: 3162.259521484375 elapsed: 0.8145861625671387
DEBUG:base_model:epoch 2111 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2111, validation elapsed_time:0.07909083366394043
INFO:base_model:epoch 2111 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2112 Train Loss: 3169.170654296875 elapsed: 0.6816239356994629
DEBUG:base_model:epoch 2112 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2112, validation elapsed_time:0.2928340435028076
INFO:base_model:epoch 2112 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2113 Train Loss: 316

INFO:base_model:epoch 2129 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2130 Train Loss: 3196.81494140625 elapsed: 0.8154091835021973
DEBUG:base_model:epoch 2130 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2130, validation elapsed_time:0.12993693351745605
INFO:base_model:epoch 2130 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2131 Train Loss: 3182.99267578125 elapsed: 0.8134970664978027
DEBUG:base_model:epoch 2131 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2131, validation elapsed_time:0.14487123489379883
INFO:base_model:epoch 2131 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2132 Train Loss: 3176

INFO:base_model:epoch 2148 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2149 Train Loss: 3182.99267578125 elapsed: 1.0497698783874512
DEBUG:base_model:epoch 2149 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2149, validation elapsed_time:0.10445857048034668
INFO:base_model:epoch 2149 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2150 Train Loss: 3182.99267578125 elapsed: 0.6758289337158203
DEBUG:base_model:epoch 2150 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2150, validation elapsed_time:0.10058283805847168
INFO:base_model:epoch 2150 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2151 Train Loss: 3169

INFO:base_model:epoch 2167 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2168 Train Loss: 3182.99267578125 elapsed: 0.913947343826294
DEBUG:base_model:epoch 2168 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2168, validation elapsed_time:0.1747729778289795
INFO:base_model:epoch 2168 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2169 Train Loss: 3176.081787109375 elapsed: 0.7530820369720459
DEBUG:base_model:epoch 2169 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2169, validation elapsed_time:0.10616183280944824
INFO:base_model:epoch 2169 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2170 Train Loss: 3169.

INFO:base_model:epoch 2186 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2187 Train Loss: 3182.99267578125 elapsed: 0.8855648040771484
DEBUG:base_model:epoch 2187 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2187, validation elapsed_time:0.167921781539917
INFO:base_model:epoch 2187 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2188 Train Loss: 3176.081787109375 elapsed: 0.9379909038543701
DEBUG:base_model:epoch 2188 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2188, validation elapsed_time:0.19905495643615723
INFO:base_model:epoch 2188 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2189 Train Loss: 3169.

INFO:base_model:epoch 2205 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2206 Train Loss: 3196.81494140625 elapsed: 0.8346290588378906
DEBUG:base_model:epoch 2206 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2206, validation elapsed_time:0.11103677749633789
INFO:base_model:epoch 2206 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2207 Train Loss: 3162.259521484375 elapsed: 0.7413618564605713
DEBUG:base_model:epoch 2207 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2207, validation elapsed_time:0.08614206314086914
INFO:base_model:epoch 2207 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2208 Train Loss: 316

INFO:base_model:epoch 2224 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2225 Train Loss: 3176.081787109375 elapsed: 0.8055508136749268
DEBUG:base_model:epoch 2225 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2225, validation elapsed_time:0.15403437614440918
INFO:base_model:epoch 2225 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2226 Train Loss: 3169.170654296875 elapsed: 0.8699691295623779
DEBUG:base_model:epoch 2226 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2226, validation elapsed_time:0.0997929573059082
INFO:base_model:epoch 2226 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2227 Train Loss: 316

INFO:base_model:epoch 2243 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2244 Train Loss: 3176.081787109375 elapsed: 1.0159201622009277
DEBUG:base_model:epoch 2244 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2244, validation elapsed_time:0.10409903526306152
INFO:base_model:epoch 2244 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2245 Train Loss: 3182.99267578125 elapsed: 0.6840786933898926
DEBUG:base_model:epoch 2245 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2245, validation elapsed_time:0.08846807479858398
INFO:base_model:epoch 2245 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2246 Train Loss: 316

INFO:base_model:epoch 2262 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2263 Train Loss: 3169.170654296875 elapsed: 0.8986632823944092
DEBUG:base_model:epoch 2263 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2263, validation elapsed_time:0.13399004936218262
INFO:base_model:epoch 2263 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2264 Train Loss: 3189.90380859375 elapsed: 0.8401379585266113
DEBUG:base_model:epoch 2264 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2264, validation elapsed_time:0.08541369438171387
INFO:base_model:epoch 2264 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2265 Train Loss: 315

INFO:base_model:epoch 2281 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2282 Train Loss: 3162.259521484375 elapsed: 0.8241150379180908
DEBUG:base_model:epoch 2282 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2282, validation elapsed_time:0.11652708053588867
INFO:base_model:epoch 2282 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2283 Train Loss: 3196.81494140625 elapsed: 0.9317398071289062
DEBUG:base_model:epoch 2283 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2283, validation elapsed_time:0.13805484771728516
INFO:base_model:epoch 2283 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2284 Train Loss: 317

INFO:base_model:epoch 2300 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2301 Train Loss: 3189.90380859375 elapsed: 0.8365061283111572
DEBUG:base_model:epoch 2301 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2301, validation elapsed_time:0.14404892921447754
INFO:base_model:epoch 2301 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2302 Train Loss: 3169.170654296875 elapsed: 0.6322381496429443
DEBUG:base_model:epoch 2302 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2302, validation elapsed_time:0.09605598449707031
INFO:base_model:epoch 2302 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2303 Train Loss: 316

INFO:base_model:epoch 2319 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2320 Train Loss: 3162.259521484375 elapsed: 0.855431079864502
DEBUG:base_model:epoch 2320 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2320, validation elapsed_time:0.13766002655029297
INFO:base_model:epoch 2320 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2321 Train Loss: 3162.259521484375 elapsed: 0.8063020706176758
DEBUG:base_model:epoch 2321 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2321, validation elapsed_time:0.12920784950256348
INFO:base_model:epoch 2321 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2322 Train Loss: 316

INFO:base_model:epoch 2338 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2339 Train Loss: 3169.170654296875 elapsed: 1.044356107711792
DEBUG:base_model:epoch 2339 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2339, validation elapsed_time:0.10041284561157227
INFO:base_model:epoch 2339 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2340 Train Loss: 3176.081787109375 elapsed: 0.6344020366668701
DEBUG:base_model:epoch 2340 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2340, validation elapsed_time:0.0831608772277832
INFO:base_model:epoch 2340 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2341 Train Loss: 3182

INFO:base_model:epoch 2357 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2358 Train Loss: 3189.90380859375 elapsed: 0.8193111419677734
DEBUG:base_model:epoch 2358 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2358, validation elapsed_time:0.13728904724121094
INFO:base_model:epoch 2358 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2359 Train Loss: 3162.259521484375 elapsed: 0.7348630428314209
DEBUG:base_model:epoch 2359 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2359, validation elapsed_time:0.11292505264282227
INFO:base_model:epoch 2359 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2360 Train Loss: 315

INFO:base_model:epoch 2376 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2377 Train Loss: 3169.170654296875 elapsed: 0.8798749446868896
DEBUG:base_model:epoch 2377 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2377, validation elapsed_time:0.19567584991455078
INFO:base_model:epoch 2377 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2378 Train Loss: 3162.259521484375 elapsed: 0.7909560203552246
DEBUG:base_model:epoch 2378 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2378, validation elapsed_time:0.12976622581481934
INFO:base_model:epoch 2378 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2379 Train Loss: 31

INFO:base_model:epoch 2395 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2396 Train Loss: 3182.99267578125 elapsed: 0.7875924110412598
DEBUG:base_model:epoch 2396 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2396, validation elapsed_time:0.08163094520568848
INFO:base_model:epoch 2396 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2397 Train Loss: 3176.081787109375 elapsed: 0.753861665725708
DEBUG:base_model:epoch 2397 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2397, validation elapsed_time:0.08483219146728516
INFO:base_model:epoch 2397 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2398 Train Loss: 3169

INFO:base_model:epoch 2414 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2415 Train Loss: 3155.3486328125 elapsed: 0.8494908809661865
DEBUG:base_model:epoch 2415 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2415, validation elapsed_time:0.08651876449584961
INFO:base_model:epoch 2415 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2416 Train Loss: 3169.170654296875 elapsed: 0.7039599418640137
DEBUG:base_model:epoch 2416 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2416, validation elapsed_time:0.300076961517334
INFO:base_model:epoch 2416 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2417 Train Loss: 3176.0

INFO:base_model:epoch 2433 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2434 Train Loss: 3155.3486328125 elapsed: 0.6726319789886475
DEBUG:base_model:epoch 2434 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2434, validation elapsed_time:0.0865468978881836
INFO:base_model:epoch 2434 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2435 Train Loss: 3169.170654296875 elapsed: 0.9304797649383545
DEBUG:base_model:epoch 2435 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2435, validation elapsed_time:0.2085099220275879
INFO:base_model:epoch 2435 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2436 Train Loss: 3182.9

INFO:base_model:epoch 2452 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2453 Train Loss: 3176.081787109375 elapsed: 0.8176259994506836
DEBUG:base_model:epoch 2453 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2453, validation elapsed_time:0.08632612228393555
INFO:base_model:epoch 2453 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2454 Train Loss: 3182.99267578125 elapsed: 0.8460738658905029
DEBUG:base_model:epoch 2454 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2454, validation elapsed_time:0.10154414176940918
INFO:base_model:epoch 2454 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2455 Train Loss: 317

INFO:base_model:epoch 2471 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2472 Train Loss: 3169.170654296875 elapsed: 0.8005576133728027
DEBUG:base_model:epoch 2472 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2472, validation elapsed_time:0.1441631317138672
INFO:base_model:epoch 2472 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2473 Train Loss: 3182.99267578125 elapsed: 0.7137200832366943
DEBUG:base_model:epoch 2473 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2473, validation elapsed_time:0.10838890075683594
INFO:base_model:epoch 2473 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2474 Train Loss: 3155

INFO:base_model:epoch 2490 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2491 Train Loss: 3176.081787109375 elapsed: 0.6766409873962402
DEBUG:base_model:epoch 2491 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2491, validation elapsed_time:0.2733190059661865
INFO:base_model:epoch 2491 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2492 Train Loss: 3162.259521484375 elapsed: 0.7826900482177734
DEBUG:base_model:epoch 2492 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2492, validation elapsed_time:0.263721227645874
INFO:base_model:epoch 2492 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2493 Train Loss: 3196.

INFO:base_model:epoch 2509 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2510 Train Loss: 3196.81494140625 elapsed: 0.8552100658416748
DEBUG:base_model:epoch 2510 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2510, validation elapsed_time:0.09329915046691895
INFO:base_model:epoch 2510 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2511 Train Loss: 3162.259521484375 elapsed: 0.6912391185760498
DEBUG:base_model:epoch 2511 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2511, validation elapsed_time:0.24134302139282227
INFO:base_model:epoch 2511 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2512 Train Loss: 316

INFO:base_model:epoch 2528 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2529 Train Loss: 3162.259521484375 elapsed: 0.6701171398162842
DEBUG:base_model:epoch 2529 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2529, validation elapsed_time:0.20220518112182617
INFO:base_model:epoch 2529 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2530 Train Loss: 3169.170654296875 elapsed: 0.8036623001098633
DEBUG:base_model:epoch 2530 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2530, validation elapsed_time:0.0962669849395752
INFO:base_model:epoch 2530 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2531 Train Loss: 316

INFO:base_model:epoch 2547 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2548 Train Loss: 3169.170654296875 elapsed: 0.8513002395629883
DEBUG:base_model:epoch 2548 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2548, validation elapsed_time:0.14913201332092285
INFO:base_model:epoch 2548 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2549 Train Loss: 3189.90380859375 elapsed: 0.6572310924530029
DEBUG:base_model:epoch 2549 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2549, validation elapsed_time:0.29091978073120117
INFO:base_model:epoch 2549 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2550 Train Loss: 317

INFO:base_model:epoch 2566 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2567 Train Loss: 3155.3486328125 elapsed: 0.8513109683990479
DEBUG:base_model:epoch 2567 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2567, validation elapsed_time:0.2543301582336426
INFO:base_model:epoch 2567 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2568 Train Loss: 3155.3486328125 elapsed: 0.8657629489898682
DEBUG:base_model:epoch 2568 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2568, validation elapsed_time:0.10160183906555176
INFO:base_model:epoch 2568 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2569 Train Loss: 3169.17

INFO:base_model:epoch 2585 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2586 Train Loss: 3162.259521484375 elapsed: 0.8420188426971436
DEBUG:base_model:epoch 2586 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2586, validation elapsed_time:0.15935420989990234
INFO:base_model:epoch 2586 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2587 Train Loss: 3169.170654296875 elapsed: 0.6933927536010742
DEBUG:base_model:epoch 2587 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2587, validation elapsed_time:0.08939123153686523
INFO:base_model:epoch 2587 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2588 Train Loss: 31

INFO:base_model:epoch 2604 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2605 Train Loss: 3189.90380859375 elapsed: 0.8296959400177002
DEBUG:base_model:epoch 2605 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2605, validation elapsed_time:0.10103082656860352
INFO:base_model:epoch 2605 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2606 Train Loss: 3169.170654296875 elapsed: 0.8144450187683105
DEBUG:base_model:epoch 2606 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2606, validation elapsed_time:0.14212417602539062
INFO:base_model:epoch 2606 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2607 Train Loss: 317

INFO:base_model:epoch 2623 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2624 Train Loss: 3162.259521484375 elapsed: 1.0854430198669434
DEBUG:base_model:epoch 2624 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2624, validation elapsed_time:0.08916878700256348
INFO:base_model:epoch 2624 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2625 Train Loss: 3182.99267578125 elapsed: 0.6449108123779297
DEBUG:base_model:epoch 2625 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2625, validation elapsed_time:0.26961207389831543
INFO:base_model:epoch 2625 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2626 Train Loss: 317

INFO:base_model:epoch 2642 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2643 Train Loss: 3182.99267578125 elapsed: 0.8224890232086182
DEBUG:base_model:epoch 2643 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2643, validation elapsed_time:0.13306212425231934
INFO:base_model:epoch 2643 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2644 Train Loss: 3182.99267578125 elapsed: 0.7998690605163574
DEBUG:base_model:epoch 2644 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2644, validation elapsed_time:0.08834695816040039
INFO:base_model:epoch 2644 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2645 Train Loss: 3176

INFO:base_model:epoch 2661 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2662 Train Loss: 3182.99267578125 elapsed: 0.7355856895446777
DEBUG:base_model:epoch 2662 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2662, validation elapsed_time:0.13601422309875488
INFO:base_model:epoch 2662 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2663 Train Loss: 3162.259521484375 elapsed: 0.8785159587860107
DEBUG:base_model:epoch 2663 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2663, validation elapsed_time:0.0963740348815918
INFO:base_model:epoch 2663 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2664 Train Loss: 3162

INFO:base_model:epoch 2680 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2681 Train Loss: 3176.081787109375 elapsed: 0.81060791015625
DEBUG:base_model:epoch 2681 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2681, validation elapsed_time:0.15969514846801758
INFO:base_model:epoch 2681 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2682 Train Loss: 3169.170654296875 elapsed: 0.7216610908508301
DEBUG:base_model:epoch 2682 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2682, validation elapsed_time:0.10991907119750977
INFO:base_model:epoch 2682 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2683 Train Loss: 3182

INFO:base_model:epoch 2699 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2700 Train Loss: 3176.081787109375 elapsed: 0.7164947986602783
DEBUG:base_model:epoch 2700 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2700, validation elapsed_time:0.17497014999389648
INFO:base_model:epoch 2700 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2701 Train Loss: 3182.99267578125 elapsed: 1.139800786972046
DEBUG:base_model:epoch 2701 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2701, validation elapsed_time:0.10800600051879883
INFO:base_model:epoch 2701 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2702 Train Loss: 3182

INFO:base_model:epoch 2718 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2719 Train Loss: 3169.170654296875 elapsed: 1.0510101318359375
DEBUG:base_model:epoch 2719 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2719, validation elapsed_time:0.2503030300140381
INFO:base_model:epoch 2719 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2720 Train Loss: 3203.72607421875 elapsed: 0.7436590194702148
DEBUG:base_model:epoch 2720 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3203.7261, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2720, validation elapsed_time:0.08451271057128906
INFO:base_model:epoch 2720 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2721 Train Loss: 3176

INFO:base_model:epoch 2737 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2738 Train Loss: 3176.081787109375 elapsed: 0.799720287322998
DEBUG:base_model:epoch 2738 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2738, validation elapsed_time:0.286395788192749
INFO:base_model:epoch 2738 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2739 Train Loss: 3169.170654296875 elapsed: 0.8582301139831543
DEBUG:base_model:epoch 2739 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2739, validation elapsed_time:0.08859801292419434
INFO:base_model:epoch 2739 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2740 Train Loss: 3162.

INFO:base_model:epoch 2756 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2757 Train Loss: 3169.170654296875 elapsed: 1.0079360008239746
DEBUG:base_model:epoch 2757 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2757, validation elapsed_time:0.08875322341918945
INFO:base_model:epoch 2757 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2758 Train Loss: 3176.081787109375 elapsed: 0.9014437198638916
DEBUG:base_model:epoch 2758 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2758, validation elapsed_time:0.14279985427856445
INFO:base_model:epoch 2758 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2759 Train Loss: 31

INFO:base_model:epoch 2775 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2776 Train Loss: 3169.170654296875 elapsed: 0.8002543449401855
DEBUG:base_model:epoch 2776 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2776, validation elapsed_time:0.16374588012695312
INFO:base_model:epoch 2776 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2777 Train Loss: 3162.259521484375 elapsed: 0.6834988594055176
DEBUG:base_model:epoch 2777 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2777, validation elapsed_time:0.0836641788482666
INFO:base_model:epoch 2777 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2778 Train Loss: 317

INFO:base_model:epoch 2794 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2795 Train Loss: 3162.259521484375 elapsed: 0.9796788692474365
DEBUG:base_model:epoch 2795 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2795, validation elapsed_time:0.22603106498718262
INFO:base_model:epoch 2795 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2796 Train Loss: 3148.4375 elapsed: 0.8700211048126221
DEBUG:base_model:epoch 2796 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3148.4375, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2796, validation elapsed_time:0.0879979133605957
INFO:base_model:epoch 2796 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2797 Train Loss: 3155.348632

INFO:base_model:epoch 2813 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2814 Train Loss: 3155.3486328125 elapsed: 0.9627141952514648
DEBUG:base_model:epoch 2814 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3155.3486, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2814, validation elapsed_time:0.08692622184753418
INFO:base_model:epoch 2814 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2815 Train Loss: 3176.081787109375 elapsed: 0.6636717319488525
DEBUG:base_model:epoch 2815 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2815, validation elapsed_time:0.13436126708984375
INFO:base_model:epoch 2815 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2816 Train Loss: 3169

INFO:base_model:epoch 2832 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2833 Train Loss: 3162.259521484375 elapsed: 0.8180058002471924
DEBUG:base_model:epoch 2833 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2833, validation elapsed_time:0.0866549015045166
INFO:base_model:epoch 2833 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2834 Train Loss: 3162.259521484375 elapsed: 0.6844019889831543
DEBUG:base_model:epoch 2834 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3162.2595, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2834, validation elapsed_time:0.3163456916809082
INFO:base_model:epoch 2834 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2835 Train Loss: 3162

INFO:base_model:epoch 2851 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2852 Train Loss: 3176.081787109375 elapsed: 0.8336141109466553
DEBUG:base_model:epoch 2852 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2852, validation elapsed_time:0.12189483642578125
INFO:base_model:epoch 2852 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2853 Train Loss: 3182.99267578125 elapsed: 0.8275971412658691
DEBUG:base_model:epoch 2853 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2853, validation elapsed_time:0.08470416069030762
INFO:base_model:epoch 2853 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2854 Train Loss: 317

INFO:base_model:epoch 2870 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2871 Train Loss: 3203.72607421875 elapsed: 0.6510980129241943
DEBUG:base_model:epoch 2871 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3203.7261, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2871, validation elapsed_time:0.10640978813171387
INFO:base_model:epoch 2871 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2872 Train Loss: 3176.081787109375 elapsed: 0.7919697761535645
DEBUG:base_model:epoch 2872 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2872, validation elapsed_time:0.19157195091247559
INFO:base_model:epoch 2872 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2873 Train Loss: 317

INFO:base_model:epoch 2889 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2890 Train Loss: 3169.170654296875 elapsed: 0.7768509387969971
DEBUG:base_model:epoch 2890 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2890, validation elapsed_time:0.08155488967895508
INFO:base_model:epoch 2890 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2891 Train Loss: 3189.90380859375 elapsed: 0.8280699253082275
DEBUG:base_model:epoch 2891 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2891, validation elapsed_time:0.08538317680358887
INFO:base_model:epoch 2891 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2892 Train Loss: 316

INFO:base_model:epoch 2908 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2909 Train Loss: 3169.170654296875 elapsed: 0.798436164855957
DEBUG:base_model:epoch 2909 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3169.1707, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2909, validation elapsed_time:0.1132211685180664
INFO:base_model:epoch 2909 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2910 Train Loss: 3176.081787109375 elapsed: 0.9012889862060547
DEBUG:base_model:epoch 2910 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2910, validation elapsed_time:0.13402628898620605
INFO:base_model:epoch 2910 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2911 Train Loss: 3155

INFO:base_model:epoch 2927 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2928 Train Loss: 3176.081787109375 elapsed: 0.8661911487579346
DEBUG:base_model:epoch 2928 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3176.0818, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2928, validation elapsed_time:0.09317493438720703
INFO:base_model:epoch 2928 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2929 Train Loss: 3189.90380859375 elapsed: 0.8400061130523682
DEBUG:base_model:epoch 2929 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3189.9038, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2929, validation elapsed_time:0.21486902236938477
INFO:base_model:epoch 2929 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2930 Train Loss: 318

INFO:base_model:epoch 2946 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2947 Train Loss: 3182.99267578125 elapsed: 0.7374923229217529
DEBUG:base_model:epoch 2947 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2947, validation elapsed_time:0.0807187557220459
INFO:base_model:epoch 2947 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2948 Train Loss: 3196.81494140625 elapsed: 0.8592748641967773
DEBUG:base_model:epoch 2948 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3196.8149, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2948, validation elapsed_time:0.08606100082397461
INFO:base_model:epoch 2948 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2949 Train Loss: 3169.

INFO:base_model:epoch 2965 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2966 Train Loss: 3148.4375 elapsed: 0.85404372215271
DEBUG:base_model:epoch 2966 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3148.4375, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2966, validation elapsed_time:0.09275388717651367
INFO:base_model:epoch 2966 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2967 Train Loss: 3182.99267578125 elapsed: 0.8928868770599365
DEBUG:base_model:epoch 2967 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(3182.9927, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2967, validation elapsed_time:0.127608060836792
INFO:base_model:epoch 2967 val result: {'loss': tensor(26.5117), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2968 Train Loss: 3182.9926757812

INFO:base_model:epoch 2984 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2985 Train Loss: 7944.6513671875 elapsed: 0.8408081531524658
DEBUG:base_model:epoch 2985 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2985, validation elapsed_time:0.08707284927368164
INFO:base_model:epoch 2985 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2986 Train Loss: 7930.8291015625 elapsed: 0.8607091903686523
DEBUG:base_model:epoch 2986 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:2986, validation elapsed_time:0.16621708869934082
INFO:base_model:epoch 2986 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 2987 Train Loss: 7930.8

INFO:base_model:epoch 3003 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3004 Train Loss: 7923.91845703125 elapsed: 0.8801600933074951
DEBUG:base_model:epoch 3004 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3004, validation elapsed_time:0.09681582450866699
INFO:base_model:epoch 3004 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3005 Train Loss: 7910.09619140625 elapsed: 0.7374870777130127
DEBUG:base_model:epoch 3005 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3005, validation elapsed_time:0.08403301239013672
INFO:base_model:epoch 3005 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3006 Train Loss: 7910

INFO:base_model:epoch 3022 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3023 Train Loss: 7910.09619140625 elapsed: 0.8932993412017822
DEBUG:base_model:epoch 3023 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3023, validation elapsed_time:0.10634398460388184
INFO:base_model:epoch 3023 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3024 Train Loss: 7910.09619140625 elapsed: 0.8338906764984131
DEBUG:base_model:epoch 3024 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3024, validation elapsed_time:0.08151602745056152
INFO:base_model:epoch 3024 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3025 Train Loss: 7917

INFO:base_model:epoch 3041 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3042 Train Loss: 7896.27392578125 elapsed: 0.8482840061187744
DEBUG:base_model:epoch 3042 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3042, validation elapsed_time:0.08434605598449707
INFO:base_model:epoch 3042 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3043 Train Loss: 7910.09619140625 elapsed: 0.8897888660430908
DEBUG:base_model:epoch 3043 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3043, validation elapsed_time:0.08429074287414551
INFO:base_model:epoch 3043 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3044 Train Loss: 7923

INFO:base_model:epoch 3060 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3061 Train Loss: 7917.00732421875 elapsed: 0.8045427799224854
DEBUG:base_model:epoch 3061 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3061, validation elapsed_time:0.1414649486541748
INFO:base_model:epoch 3061 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3062 Train Loss: 7937.740234375 elapsed: 0.7103800773620605
DEBUG:base_model:epoch 3062 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3062, validation elapsed_time:0.08311080932617188
INFO:base_model:epoch 3062 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3063 Train Loss: 7930.82

INFO:base_model:epoch 3079 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3080 Train Loss: 7930.8291015625 elapsed: 0.8109230995178223
DEBUG:base_model:epoch 3080 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3080, validation elapsed_time:0.08854818344116211
INFO:base_model:epoch 3080 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3081 Train Loss: 7923.91845703125 elapsed: 0.8068680763244629
DEBUG:base_model:epoch 3081 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3081, validation elapsed_time:0.11080789566040039
INFO:base_model:epoch 3081 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3082 Train Loss: 7923.

INFO:base_model:epoch 3098 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3099 Train Loss: 7937.740234375 elapsed: 0.8739290237426758
DEBUG:base_model:epoch 3099 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3099, validation elapsed_time:0.08352184295654297
INFO:base_model:epoch 3099 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3100 Train Loss: 7937.740234375 elapsed: 0.7212090492248535
DEBUG:base_model:epoch 3100 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3100, validation elapsed_time:0.19260072708129883
INFO:base_model:epoch 3100 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3101 Train Loss: 7923.918

INFO:base_model:epoch 3117 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3118 Train Loss: 7951.5625 elapsed: 0.8199591636657715
DEBUG:base_model:epoch 3118 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3118, validation elapsed_time:0.13009881973266602
INFO:base_model:epoch 3118 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3119 Train Loss: 7937.740234375 elapsed: 0.786686897277832
DEBUG:base_model:epoch 3119 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3119, validation elapsed_time:0.08475208282470703
INFO:base_model:epoch 3119 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3120 Train Loss: 7917.007324218

INFO:base_model:epoch 3136 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3137 Train Loss: 7944.6513671875 elapsed: 0.8013529777526855
DEBUG:base_model:epoch 3137 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3137, validation elapsed_time:0.08441710472106934
INFO:base_model:epoch 3137 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3138 Train Loss: 7917.00732421875 elapsed: 0.8463740348815918
DEBUG:base_model:epoch 3138 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3138, validation elapsed_time:0.12737512588500977
INFO:base_model:epoch 3138 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3139 Train Loss: 7930.

INFO:base_model:epoch 3155 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3156 Train Loss: 7923.91845703125 elapsed: 0.8605270385742188
DEBUG:base_model:epoch 3156 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3156, validation elapsed_time:0.10847806930541992
INFO:base_model:epoch 3156 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3157 Train Loss: 7930.8291015625 elapsed: 0.6946520805358887
DEBUG:base_model:epoch 3157 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3157, validation elapsed_time:0.08763003349304199
INFO:base_model:epoch 3157 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3158 Train Loss: 7910.

INFO:base_model:epoch 3174 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3175 Train Loss: 7937.740234375 elapsed: 0.8460249900817871
DEBUG:base_model:epoch 3175 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3175, validation elapsed_time:0.12919402122497559
INFO:base_model:epoch 3175 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3176 Train Loss: 7937.740234375 elapsed: 0.6878700256347656
DEBUG:base_model:epoch 3176 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3176, validation elapsed_time:0.10519814491271973
INFO:base_model:epoch 3176 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3177 Train Loss: 7930.829

INFO:base_model:epoch 3193 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3194 Train Loss: 7923.91845703125 elapsed: 0.7411267757415771
DEBUG:base_model:epoch 3194 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3194, validation elapsed_time:0.08204889297485352
INFO:base_model:epoch 3194 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3195 Train Loss: 7917.00732421875 elapsed: 0.9777288436889648
DEBUG:base_model:epoch 3195 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3195, validation elapsed_time:0.11863207817077637
INFO:base_model:epoch 3195 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3196 Train Loss: 7882

INFO:base_model:epoch 3212 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3213 Train Loss: 7923.91845703125 elapsed: 0.779594898223877
DEBUG:base_model:epoch 3213 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3213, validation elapsed_time:0.233687162399292
INFO:base_model:epoch 3213 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3214 Train Loss: 7951.5625 elapsed: 0.7811799049377441
DEBUG:base_model:epoch 3214 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3214, validation elapsed_time:0.08969879150390625
INFO:base_model:epoch 3214 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3215 Train Loss: 7917.007324218

INFO:base_model:epoch 3231 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3232 Train Loss: 7923.91845703125 elapsed: 0.8102631568908691
DEBUG:base_model:epoch 3232 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3232, validation elapsed_time:0.08266711235046387
INFO:base_model:epoch 3232 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3233 Train Loss: 7917.00732421875 elapsed: 0.7124409675598145
DEBUG:base_model:epoch 3233 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3233, validation elapsed_time:0.08233118057250977
INFO:base_model:epoch 3233 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3234 Train Loss: 7930

INFO:base_model:epoch 3250 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3251 Train Loss: 7917.00732421875 elapsed: 0.9232499599456787
DEBUG:base_model:epoch 3251 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3251, validation elapsed_time:0.16417789459228516
INFO:base_model:epoch 3251 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3252 Train Loss: 7944.6513671875 elapsed: 0.8266081809997559
DEBUG:base_model:epoch 3252 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3252, validation elapsed_time:0.09395122528076172
INFO:base_model:epoch 3252 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3253 Train Loss: 7917.

INFO:base_model:epoch 3269 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3270 Train Loss: 7930.8291015625 elapsed: 0.7371361255645752
DEBUG:base_model:epoch 3270 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3270, validation elapsed_time:0.08473682403564453
INFO:base_model:epoch 3270 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3271 Train Loss: 7930.8291015625 elapsed: 0.8007891178131104
DEBUG:base_model:epoch 3271 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3271, validation elapsed_time:0.16785407066345215
INFO:base_model:epoch 3271 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3272 Train Loss: 7917.0

INFO:base_model:epoch 3288 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3289 Train Loss: 7917.00732421875 elapsed: 0.8369588851928711
DEBUG:base_model:epoch 3289 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3289, validation elapsed_time:0.14382719993591309
INFO:base_model:epoch 3289 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3290 Train Loss: 7937.740234375 elapsed: 0.8185021877288818
DEBUG:base_model:epoch 3290 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3290, validation elapsed_time:0.1367969512939453
INFO:base_model:epoch 3290 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3291 Train Loss: 7937.74

INFO:base_model:epoch 3307 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3308 Train Loss: 7937.740234375 elapsed: 0.68044114112854
DEBUG:base_model:epoch 3308 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3308, validation elapsed_time:0.313291072845459
INFO:base_model:epoch 3308 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3309 Train Loss: 7930.8291015625 elapsed: 0.7692317962646484
DEBUG:base_model:epoch 3309 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3309, validation elapsed_time:0.11239290237426758
INFO:base_model:epoch 3309 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3310 Train Loss: 7930.829101

INFO:base_model:epoch 3326 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3327 Train Loss: 7910.09619140625 elapsed: 0.8308007717132568
DEBUG:base_model:epoch 3327 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3327, validation elapsed_time:0.08104300498962402
INFO:base_model:epoch 3327 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3328 Train Loss: 7937.740234375 elapsed: 0.6996550559997559
DEBUG:base_model:epoch 3328 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3328, validation elapsed_time:0.09740900993347168
INFO:base_model:epoch 3328 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3329 Train Loss: 7923.9

INFO:base_model:epoch 3345 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3346 Train Loss: 7937.740234375 elapsed: 0.8399441242218018
DEBUG:base_model:epoch 3346 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3346, validation elapsed_time:0.08304810523986816
INFO:base_model:epoch 3346 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3347 Train Loss: 7930.8291015625 elapsed: 0.8110678195953369
DEBUG:base_model:epoch 3347 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3347, validation elapsed_time:0.15245676040649414
INFO:base_model:epoch 3347 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3348 Train Loss: 7923.91

INFO:base_model:epoch 3364 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3365 Train Loss: 7944.6513671875 elapsed: 0.6976127624511719
DEBUG:base_model:epoch 3365 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3365, validation elapsed_time:0.22835612297058105
INFO:base_model:epoch 3365 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3366 Train Loss: 7923.91845703125 elapsed: 0.8234419822692871
DEBUG:base_model:epoch 3366 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3366, validation elapsed_time:0.09772086143493652
INFO:base_model:epoch 3366 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3367 Train Loss: 7937.

INFO:base_model:epoch 3383 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3384 Train Loss: 7910.09619140625 elapsed: 0.931434154510498
DEBUG:base_model:epoch 3384 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3384, validation elapsed_time:0.21364402770996094
INFO:base_model:epoch 3384 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3385 Train Loss: 7930.8291015625 elapsed: 0.8158810138702393
DEBUG:base_model:epoch 3385 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3385, validation elapsed_time:0.08285212516784668
INFO:base_model:epoch 3385 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3386 Train Loss: 7917.0

INFO:base_model:epoch 3402 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3403 Train Loss: 7944.6513671875 elapsed: 0.8294370174407959
DEBUG:base_model:epoch 3403 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3403, validation elapsed_time:0.1897590160369873
INFO:base_model:epoch 3403 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3404 Train Loss: 7923.91845703125 elapsed: 0.8522717952728271
DEBUG:base_model:epoch 3404 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3404, validation elapsed_time:0.08574390411376953
INFO:base_model:epoch 3404 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3405 Train Loss: 7930.8

INFO:base_model:epoch 3421 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3422 Train Loss: 7937.740234375 elapsed: 0.7870378494262695
DEBUG:base_model:epoch 3422 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3422, validation elapsed_time:0.12639689445495605
INFO:base_model:epoch 3422 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3423 Train Loss: 7930.8291015625 elapsed: 0.8656482696533203
DEBUG:base_model:epoch 3423 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3423, validation elapsed_time:0.16414093971252441
INFO:base_model:epoch 3423 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3424 Train Loss: 7910.09

INFO:base_model:epoch 3440 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3441 Train Loss: 7930.8291015625 elapsed: 0.7580471038818359
DEBUG:base_model:epoch 3441 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3441, validation elapsed_time:0.08088397979736328
INFO:base_model:epoch 3441 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3442 Train Loss: 7930.8291015625 elapsed: 0.8639411926269531
DEBUG:base_model:epoch 3442 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3442, validation elapsed_time:0.08208513259887695
INFO:base_model:epoch 3442 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3443 Train Loss: 7923.9

INFO:base_model:epoch 3459 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3460 Train Loss: 7930.8291015625 elapsed: 0.8247191905975342
DEBUG:base_model:epoch 3460 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3460, validation elapsed_time:0.08293795585632324
INFO:base_model:epoch 3460 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3461 Train Loss: 7944.6513671875 elapsed: 0.7957322597503662
DEBUG:base_model:epoch 3461 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3461, validation elapsed_time:0.14798879623413086
INFO:base_model:epoch 3461 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3462 Train Loss: 7917.0

INFO:base_model:epoch 3478 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3479 Train Loss: 7917.00732421875 elapsed: 0.7253000736236572
DEBUG:base_model:epoch 3479 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3479, validation elapsed_time:0.19794297218322754
INFO:base_model:epoch 3479 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3480 Train Loss: 7923.91845703125 elapsed: 0.8045928478240967
DEBUG:base_model:epoch 3480 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3480, validation elapsed_time:0.1176750659942627
INFO:base_model:epoch 3480 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3481 Train Loss: 7923.

INFO:base_model:epoch 3497 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3498 Train Loss: 7937.740234375 elapsed: 0.7390398979187012
DEBUG:base_model:epoch 3498 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3498, validation elapsed_time:0.10138392448425293
INFO:base_model:epoch 3498 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3499 Train Loss: 7937.740234375 elapsed: 0.760141134262085
DEBUG:base_model:epoch 3499 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3499, validation elapsed_time:0.08230400085449219
INFO:base_model:epoch 3499 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3500 Train Loss: 7930.8291

INFO:base_model:epoch 3516 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3517 Train Loss: 7937.740234375 elapsed: 0.763958215713501
DEBUG:base_model:epoch 3517 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3517, validation elapsed_time:0.0823359489440918
INFO:base_model:epoch 3517 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3518 Train Loss: 7910.09619140625 elapsed: 0.7712669372558594
DEBUG:base_model:epoch 3518 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3518, validation elapsed_time:0.08244800567626953
INFO:base_model:epoch 3518 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3519 Train Loss: 7930.829

INFO:base_model:epoch 3535 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3536 Train Loss: 7937.740234375 elapsed: 0.6313560009002686
DEBUG:base_model:epoch 3536 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3536, validation elapsed_time:0.15522098541259766
INFO:base_model:epoch 3536 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3537 Train Loss: 7917.00732421875 elapsed: 0.8246431350708008
DEBUG:base_model:epoch 3537 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3537, validation elapsed_time:0.08495903015136719
INFO:base_model:epoch 3537 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3538 Train Loss: 7917.0

INFO:base_model:epoch 3554 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3555 Train Loss: 7937.740234375 elapsed: 0.8334238529205322
DEBUG:base_model:epoch 3555 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3555, validation elapsed_time:0.12144017219543457
INFO:base_model:epoch 3555 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3556 Train Loss: 7917.00732421875 elapsed: 0.7656831741333008
DEBUG:base_model:epoch 3556 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3556, validation elapsed_time:0.08067607879638672
INFO:base_model:epoch 3556 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3557 Train Loss: 7923.9

INFO:base_model:epoch 3573 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3574 Train Loss: 7910.09619140625 elapsed: 0.956061840057373
DEBUG:base_model:epoch 3574 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3574, validation elapsed_time:0.1635129451751709
INFO:base_model:epoch 3574 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3575 Train Loss: 7930.8291015625 elapsed: 0.9929490089416504
DEBUG:base_model:epoch 3575 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3575, validation elapsed_time:0.09160923957824707
INFO:base_model:epoch 3575 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3576 Train Loss: 7923.91

INFO:base_model:epoch 3592 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3593 Train Loss: 7937.740234375 elapsed: 0.6827900409698486
DEBUG:base_model:epoch 3593 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3593, validation elapsed_time:0.10373711585998535
INFO:base_model:epoch 3593 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3594 Train Loss: 7930.8291015625 elapsed: 0.7171869277954102
DEBUG:base_model:epoch 3594 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3594, validation elapsed_time:0.08030962944030762
INFO:base_model:epoch 3594 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3595 Train Loss: 7923.91

INFO:base_model:epoch 3611 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3612 Train Loss: 7923.91845703125 elapsed: 0.7636430263519287
DEBUG:base_model:epoch 3612 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3612, validation elapsed_time:0.08519506454467773
INFO:base_model:epoch 3612 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3613 Train Loss: 7930.8291015625 elapsed: 0.7662272453308105
DEBUG:base_model:epoch 3613 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3613, validation elapsed_time:0.15159392356872559
INFO:base_model:epoch 3613 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3614 Train Loss: 7917.

INFO:base_model:epoch 3630 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3631 Train Loss: 7917.00732421875 elapsed: 0.8706009387969971
DEBUG:base_model:epoch 3631 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3631, validation elapsed_time:0.15401101112365723
INFO:base_model:epoch 3631 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3632 Train Loss: 7930.8291015625 elapsed: 0.7703139781951904
DEBUG:base_model:epoch 3632 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3632, validation elapsed_time:0.0809011459350586
INFO:base_model:epoch 3632 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3633 Train Loss: 7923.9

INFO:base_model:epoch 3649 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3650 Train Loss: 7944.6513671875 elapsed: 0.6641862392425537
DEBUG:base_model:epoch 3650 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3650, validation elapsed_time:0.15077781677246094
INFO:base_model:epoch 3650 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3651 Train Loss: 7917.00732421875 elapsed: 0.8130288124084473
DEBUG:base_model:epoch 3651 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3651, validation elapsed_time:0.0874490737915039
INFO:base_model:epoch 3651 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3652 Train Loss: 7944.6

INFO:base_model:epoch 3668 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3669 Train Loss: 7937.740234375 elapsed: 0.8125729560852051
DEBUG:base_model:epoch 3669 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3669, validation elapsed_time:0.1331770420074463
INFO:base_model:epoch 3669 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3670 Train Loss: 7910.09619140625 elapsed: 0.7319459915161133
DEBUG:base_model:epoch 3670 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3670, validation elapsed_time:0.10662508010864258
INFO:base_model:epoch 3670 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3671 Train Loss: 7910.09

INFO:base_model:epoch 3687 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3688 Train Loss: 7917.00732421875 elapsed: 0.8075699806213379
DEBUG:base_model:epoch 3688 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3688, validation elapsed_time:0.10291099548339844
INFO:base_model:epoch 3688 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3689 Train Loss: 7923.91845703125 elapsed: 0.7367539405822754
DEBUG:base_model:epoch 3689 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3689, validation elapsed_time:0.08557915687561035
INFO:base_model:epoch 3689 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3690 Train Loss: 7930

INFO:base_model:epoch 3706 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3707 Train Loss: 7930.8291015625 elapsed: 0.6753532886505127
DEBUG:base_model:epoch 3707 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3707, validation elapsed_time:0.10473775863647461
INFO:base_model:epoch 3707 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3708 Train Loss: 7917.00732421875 elapsed: 0.7848141193389893
DEBUG:base_model:epoch 3708 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3708, validation elapsed_time:0.0821068286895752
INFO:base_model:epoch 3708 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3709 Train Loss: 7930.8

INFO:base_model:epoch 3725 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3726 Train Loss: 7944.6513671875 elapsed: 0.6200590133666992
DEBUG:base_model:epoch 3726 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3726, validation elapsed_time:0.10820198059082031
INFO:base_model:epoch 3726 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3727 Train Loss: 7930.8291015625 elapsed: 0.6457390785217285
DEBUG:base_model:epoch 3727 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3727, validation elapsed_time:0.20215415954589844
INFO:base_model:epoch 3727 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3728 Train Loss: 7917.0

INFO:base_model:epoch 3744 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3745 Train Loss: 7917.00732421875 elapsed: 0.8346900939941406
DEBUG:base_model:epoch 3745 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3745, validation elapsed_time:0.12863588333129883
INFO:base_model:epoch 3745 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3746 Train Loss: 7930.8291015625 elapsed: 0.7070212364196777
DEBUG:base_model:epoch 3746 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3746, validation elapsed_time:0.08528518676757812
INFO:base_model:epoch 3746 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3747 Train Loss: 7903.

INFO:base_model:epoch 3763 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3764 Train Loss: 7930.8291015625 elapsed: 0.6580789089202881
DEBUG:base_model:epoch 3764 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3764, validation elapsed_time:0.27037692070007324
INFO:base_model:epoch 3764 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3765 Train Loss: 7917.00732421875 elapsed: 0.6715419292449951
DEBUG:base_model:epoch 3765 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3765, validation elapsed_time:0.15468072891235352
INFO:base_model:epoch 3765 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3766 Train Loss: 7937.

INFO:base_model:epoch 3782 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3783 Train Loss: 7896.27392578125 elapsed: 0.6848962306976318
DEBUG:base_model:epoch 3783 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3783, validation elapsed_time:0.08542680740356445
INFO:base_model:epoch 3783 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3784 Train Loss: 7937.740234375 elapsed: 0.8867599964141846
DEBUG:base_model:epoch 3784 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3784, validation elapsed_time:0.1484699249267578
INFO:base_model:epoch 3784 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3785 Train Loss: 7951.56

INFO:base_model:epoch 3801 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3802 Train Loss: 7896.27392578125 elapsed: 0.7458140850067139
DEBUG:base_model:epoch 3802 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3802, validation elapsed_time:0.15468215942382812
INFO:base_model:epoch 3802 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3803 Train Loss: 7930.8291015625 elapsed: 0.6390080451965332
DEBUG:base_model:epoch 3803 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3803, validation elapsed_time:0.0824272632598877
INFO:base_model:epoch 3803 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3804 Train Loss: 7930.8

INFO:base_model:epoch 3820 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3821 Train Loss: 7930.8291015625 elapsed: 0.6618916988372803
DEBUG:base_model:epoch 3821 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3821, validation elapsed_time:0.11049294471740723
INFO:base_model:epoch 3821 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3822 Train Loss: 7917.00732421875 elapsed: 0.8531498908996582
DEBUG:base_model:epoch 3822 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3822, validation elapsed_time:0.1638648509979248
INFO:base_model:epoch 3822 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3823 Train Loss: 7930.8

INFO:base_model:epoch 3839 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3840 Train Loss: 7937.740234375 elapsed: 0.7410340309143066
DEBUG:base_model:epoch 3840 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3840, validation elapsed_time:0.0924839973449707
INFO:base_model:epoch 3840 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3841 Train Loss: 7937.740234375 elapsed: 0.801713228225708
DEBUG:base_model:epoch 3841 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3841, validation elapsed_time:0.1059119701385498
INFO:base_model:epoch 3841 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3842 Train Loss: 7923.918457

INFO:base_model:epoch 3858 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3859 Train Loss: 7923.91845703125 elapsed: 0.7679460048675537
DEBUG:base_model:epoch 3859 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3859, validation elapsed_time:0.15446925163269043
INFO:base_model:epoch 3859 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3860 Train Loss: 7930.8291015625 elapsed: 0.6596169471740723
DEBUG:base_model:epoch 3860 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3860, validation elapsed_time:0.09726285934448242
INFO:base_model:epoch 3860 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3861 Train Loss: 7937.

INFO:base_model:epoch 3877 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3878 Train Loss: 7930.8291015625 elapsed: 0.7370209693908691
DEBUG:base_model:epoch 3878 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3878, validation elapsed_time:0.08115506172180176
INFO:base_model:epoch 3878 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3879 Train Loss: 7923.91845703125 elapsed: 0.7038862705230713
DEBUG:base_model:epoch 3879 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3879, validation elapsed_time:0.08407878875732422
INFO:base_model:epoch 3879 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3880 Train Loss: 7917.

INFO:base_model:epoch 3896 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3897 Train Loss: 7917.00732421875 elapsed: 0.724916934967041
DEBUG:base_model:epoch 3897 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3897, validation elapsed_time:0.10243701934814453
INFO:base_model:epoch 3897 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3898 Train Loss: 7937.740234375 elapsed: 0.7650332450866699
DEBUG:base_model:epoch 3898 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3898, validation elapsed_time:0.0908212661743164
INFO:base_model:epoch 3898 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3899 Train Loss: 7937.740

INFO:base_model:epoch 3915 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3916 Train Loss: 7944.6513671875 elapsed: 0.7699940204620361
DEBUG:base_model:epoch 3916 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3916, validation elapsed_time:0.08437299728393555
INFO:base_model:epoch 3916 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3917 Train Loss: 7923.91845703125 elapsed: 0.7978358268737793
DEBUG:base_model:epoch 3917 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3917, validation elapsed_time:0.09496593475341797
INFO:base_model:epoch 3917 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3918 Train Loss: 7917.

INFO:base_model:epoch 3934 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3935 Train Loss: 7930.8291015625 elapsed: 0.7472579479217529
DEBUG:base_model:epoch 3935 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3935, validation elapsed_time:0.08254122734069824
INFO:base_model:epoch 3935 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3936 Train Loss: 7917.00732421875 elapsed: 0.8005099296569824
DEBUG:base_model:epoch 3936 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3936, validation elapsed_time:0.08177495002746582
INFO:base_model:epoch 3936 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3937 Train Loss: 7923.

INFO:base_model:epoch 3953 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3954 Train Loss: 7910.09619140625 elapsed: 0.7222709655761719
DEBUG:base_model:epoch 3954 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3954, validation elapsed_time:0.24353408813476562
INFO:base_model:epoch 3954 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3955 Train Loss: 7937.740234375 elapsed: 0.808783769607544
DEBUG:base_model:epoch 3955 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3955, validation elapsed_time:0.14090633392333984
INFO:base_model:epoch 3955 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3956 Train Loss: 7937.74

INFO:base_model:epoch 3972 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3973 Train Loss: 7930.8291015625 elapsed: 1.0881967544555664
DEBUG:base_model:epoch 3973 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3973, validation elapsed_time:0.23497986793518066
INFO:base_model:epoch 3973 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3974 Train Loss: 7896.27392578125 elapsed: 1.0817649364471436
DEBUG:base_model:epoch 3974 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3974, validation elapsed_time:0.359846830368042
INFO:base_model:epoch 3974 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3975 Train Loss: 7910.09

INFO:base_model:epoch 3991 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3992 Train Loss: 7910.09619140625 elapsed: 0.7746720314025879
DEBUG:base_model:epoch 3992 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3992, validation elapsed_time:0.08432292938232422
INFO:base_model:epoch 3992 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3993 Train Loss: 7917.00732421875 elapsed: 0.8220827579498291
DEBUG:base_model:epoch 3993 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:3993, validation elapsed_time:0.08378005027770996
INFO:base_model:epoch 3993 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 3994 Train Loss: 7923

INFO:base_model:epoch 4010 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4011 Train Loss: 7930.8291015625 elapsed: 0.8102738857269287
DEBUG:base_model:epoch 4011 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4011, validation elapsed_time:0.08516621589660645
INFO:base_model:epoch 4011 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4012 Train Loss: 7917.00732421875 elapsed: 0.8100829124450684
DEBUG:base_model:epoch 4012 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4012, validation elapsed_time:0.0967559814453125
INFO:base_model:epoch 4012 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4013 Train Loss: 7930.8

INFO:base_model:epoch 4029 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4030 Train Loss: 7923.91845703125 elapsed: 0.91147780418396
DEBUG:base_model:epoch 4030 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4030, validation elapsed_time:0.08276200294494629
INFO:base_model:epoch 4030 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4031 Train Loss: 7930.8291015625 elapsed: 0.8109021186828613
DEBUG:base_model:epoch 4031 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4031, validation elapsed_time:0.13911032676696777
INFO:base_model:epoch 4031 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4032 Train Loss: 7923.91

INFO:base_model:epoch 4048 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4049 Train Loss: 7937.740234375 elapsed: 0.8197851181030273
DEBUG:base_model:epoch 4049 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4049, validation elapsed_time:0.1527099609375
INFO:base_model:epoch 4049 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4050 Train Loss: 7903.18505859375 elapsed: 0.7986249923706055
DEBUG:base_model:epoch 4050 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4050, validation elapsed_time:0.08398008346557617
INFO:base_model:epoch 4050 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4051 Train Loss: 7930.82910

INFO:base_model:epoch 4067 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4068 Train Loss: 7944.6513671875 elapsed: 0.739454984664917
DEBUG:base_model:epoch 4068 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4068, validation elapsed_time:0.1181647777557373
INFO:base_model:epoch 4068 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4069 Train Loss: 7917.00732421875 elapsed: 0.5954689979553223
DEBUG:base_model:epoch 4069 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4069, validation elapsed_time:0.09595322608947754
INFO:base_model:epoch 4069 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4070 Train Loss: 7937.74

INFO:base_model:epoch 4086 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4087 Train Loss: 7923.91845703125 elapsed: 0.7541098594665527
DEBUG:base_model:epoch 4087 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4087, validation elapsed_time:0.08433008193969727
INFO:base_model:epoch 4087 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4088 Train Loss: 7944.6513671875 elapsed: 0.9205877780914307
DEBUG:base_model:epoch 4088 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4088, validation elapsed_time:0.08129715919494629
INFO:base_model:epoch 4088 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4089 Train Loss: 7944.

INFO:base_model:epoch 4105 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4106 Train Loss: 7930.8291015625 elapsed: 0.7654590606689453
DEBUG:base_model:epoch 4106 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4106, validation elapsed_time:0.0843350887298584
INFO:base_model:epoch 4106 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4107 Train Loss: 7930.8291015625 elapsed: 0.8018031120300293
DEBUG:base_model:epoch 4107 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4107, validation elapsed_time:0.1307687759399414
INFO:base_model:epoch 4107 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4108 Train Loss: 7910.096

INFO:base_model:epoch 4124 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4125 Train Loss: 7937.740234375 elapsed: 0.8585429191589355
DEBUG:base_model:epoch 4125 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4125, validation elapsed_time:0.19083285331726074
INFO:base_model:epoch 4125 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4126 Train Loss: 7930.8291015625 elapsed: 0.9947140216827393
DEBUG:base_model:epoch 4126 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4126, validation elapsed_time:0.1925370693206787
INFO:base_model:epoch 4126 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4127 Train Loss: 7910.096

INFO:base_model:epoch 4143 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4144 Train Loss: 7930.8291015625 elapsed: 0.7949860095977783
DEBUG:base_model:epoch 4144 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4144, validation elapsed_time:0.08402299880981445
INFO:base_model:epoch 4144 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4145 Train Loss: 7917.00732421875 elapsed: 0.6393792629241943
DEBUG:base_model:epoch 4145 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4145, validation elapsed_time:0.12650012969970703
INFO:base_model:epoch 4145 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4146 Train Loss: 7917.

INFO:base_model:epoch 4162 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4163 Train Loss: 7923.91845703125 elapsed: 0.6939918994903564
DEBUG:base_model:epoch 4163 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4163, validation elapsed_time:0.24297881126403809
INFO:base_model:epoch 4163 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4164 Train Loss: 7917.00732421875 elapsed: 0.8240437507629395
DEBUG:base_model:epoch 4164 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4164, validation elapsed_time:0.10434412956237793
INFO:base_model:epoch 4164 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4165 Train Loss: 7923

INFO:base_model:epoch 4181 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4182 Train Loss: 7910.09619140625 elapsed: 0.7124309539794922
DEBUG:base_model:epoch 4182 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4182, validation elapsed_time:0.08348202705383301
INFO:base_model:epoch 4182 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4183 Train Loss: 7923.91845703125 elapsed: 0.6800100803375244
DEBUG:base_model:epoch 4183 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4183, validation elapsed_time:0.08576774597167969
INFO:base_model:epoch 4183 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4184 Train Loss: 7917

INFO:base_model:epoch 4200 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4201 Train Loss: 7937.740234375 elapsed: 0.8548500537872314
DEBUG:base_model:epoch 4201 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4201, validation elapsed_time:0.09302592277526855
INFO:base_model:epoch 4201 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4202 Train Loss: 7917.00732421875 elapsed: 0.6992228031158447
DEBUG:base_model:epoch 4202 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4202, validation elapsed_time:0.10594725608825684
INFO:base_model:epoch 4202 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4203 Train Loss: 7930.8

INFO:base_model:epoch 4219 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4220 Train Loss: 7917.00732421875 elapsed: 0.8959648609161377
DEBUG:base_model:epoch 4220 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4220, validation elapsed_time:0.1506030559539795
INFO:base_model:epoch 4220 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4221 Train Loss: 7917.00732421875 elapsed: 1.075732946395874
DEBUG:base_model:epoch 4221 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4221, validation elapsed_time:0.36754679679870605
INFO:base_model:epoch 4221 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4222 Train Loss: 7910.0

INFO:base_model:epoch 4238 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4239 Train Loss: 7937.740234375 elapsed: 0.6196410655975342
DEBUG:base_model:epoch 4239 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4239, validation elapsed_time:0.10001587867736816
INFO:base_model:epoch 4239 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4240 Train Loss: 7930.8291015625 elapsed: 0.941234827041626
DEBUG:base_model:epoch 4240 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4240, validation elapsed_time:0.08679604530334473
INFO:base_model:epoch 4240 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4241 Train Loss: 7923.918

INFO:base_model:epoch 4257 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4258 Train Loss: 7937.740234375 elapsed: 0.7058839797973633
DEBUG:base_model:epoch 4258 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4258, validation elapsed_time:0.11640596389770508
INFO:base_model:epoch 4258 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4259 Train Loss: 7910.09619140625 elapsed: 0.6488368511199951
DEBUG:base_model:epoch 4259 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4259, validation elapsed_time:0.18610692024230957
INFO:base_model:epoch 4259 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4260 Train Loss: 7930.8

INFO:base_model:epoch 4276 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4277 Train Loss: 7923.91845703125 elapsed: 0.7399492263793945
DEBUG:base_model:epoch 4277 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4277, validation elapsed_time:0.11201000213623047
INFO:base_model:epoch 4277 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4278 Train Loss: 7923.91845703125 elapsed: 0.7210919857025146
DEBUG:base_model:epoch 4278 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4278, validation elapsed_time:0.0844719409942627
INFO:base_model:epoch 4278 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4279 Train Loss: 7937.

INFO:base_model:epoch 4295 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4296 Train Loss: 7923.91845703125 elapsed: 0.8024020195007324
DEBUG:base_model:epoch 4296 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4296, validation elapsed_time:0.20096611976623535
INFO:base_model:epoch 4296 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4297 Train Loss: 7944.6513671875 elapsed: 0.813478946685791
DEBUG:base_model:epoch 4297 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4297, validation elapsed_time:0.10659193992614746
INFO:base_model:epoch 4297 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4298 Train Loss: 7917.0

INFO:base_model:epoch 4314 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4315 Train Loss: 7930.8291015625 elapsed: 0.6681339740753174
DEBUG:base_model:epoch 4315 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4315, validation elapsed_time:0.08461475372314453
INFO:base_model:epoch 4315 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4316 Train Loss: 7930.8291015625 elapsed: 0.6629188060760498
DEBUG:base_model:epoch 4316 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4316, validation elapsed_time:0.24697017669677734
INFO:base_model:epoch 4316 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4317 Train Loss: 7917.0

INFO:base_model:epoch 4333 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4334 Train Loss: 7937.740234375 elapsed: 0.8417861461639404
DEBUG:base_model:epoch 4334 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4334, validation elapsed_time:0.09575986862182617
INFO:base_model:epoch 4334 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4335 Train Loss: 7937.740234375 elapsed: 0.7458639144897461
DEBUG:base_model:epoch 4335 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4335, validation elapsed_time:0.08468794822692871
INFO:base_model:epoch 4335 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4336 Train Loss: 7930.829

INFO:base_model:epoch 4352 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4353 Train Loss: 7930.8291015625 elapsed: 0.8784630298614502
DEBUG:base_model:epoch 4353 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4353, validation elapsed_time:0.18226289749145508
INFO:base_model:epoch 4353 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4354 Train Loss: 7923.91845703125 elapsed: 0.8641738891601562
DEBUG:base_model:epoch 4354 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4354, validation elapsed_time:0.12520289421081543
INFO:base_model:epoch 4354 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4355 Train Loss: 7937.

INFO:base_model:epoch 4371 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4372 Train Loss: 7944.6513671875 elapsed: 0.7586350440979004
DEBUG:base_model:epoch 4372 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4372, validation elapsed_time:0.08235907554626465
INFO:base_model:epoch 4372 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4373 Train Loss: 7944.6513671875 elapsed: 0.6865158081054688
DEBUG:base_model:epoch 4373 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4373, validation elapsed_time:0.08363604545593262
INFO:base_model:epoch 4373 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4374 Train Loss: 7917.0

INFO:base_model:epoch 4390 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4391 Train Loss: 7923.91845703125 elapsed: 0.8369431495666504
DEBUG:base_model:epoch 4391 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4391, validation elapsed_time:0.09345197677612305
INFO:base_model:epoch 4391 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4392 Train Loss: 7944.6513671875 elapsed: 0.6880550384521484
DEBUG:base_model:epoch 4392 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4392, validation elapsed_time:0.11080718040466309
INFO:base_model:epoch 4392 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4393 Train Loss: 7910.

INFO:base_model:epoch 4409 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4410 Train Loss: 7930.8291015625 elapsed: 0.6799547672271729
DEBUG:base_model:epoch 4410 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4410, validation elapsed_time:0.08954119682312012
INFO:base_model:epoch 4410 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4411 Train Loss: 7937.740234375 elapsed: 0.8941440582275391
DEBUG:base_model:epoch 4411 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4411, validation elapsed_time:0.12546586990356445
INFO:base_model:epoch 4411 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4412 Train Loss: 7930.82

INFO:base_model:epoch 4428 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4429 Train Loss: 7917.00732421875 elapsed: 0.6549317836761475
DEBUG:base_model:epoch 4429 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4429, validation elapsed_time:0.0837860107421875
INFO:base_model:epoch 4429 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4430 Train Loss: 7944.6513671875 elapsed: 0.7308230400085449
DEBUG:base_model:epoch 4430 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4430, validation elapsed_time:0.20965290069580078
INFO:base_model:epoch 4430 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4431 Train Loss: 7923.9

INFO:base_model:epoch 4447 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4448 Train Loss: 7910.09619140625 elapsed: 0.7533941268920898
DEBUG:base_model:epoch 4448 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4448, validation elapsed_time:0.08490300178527832
INFO:base_model:epoch 4448 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4449 Train Loss: 7917.00732421875 elapsed: 0.6979930400848389
DEBUG:base_model:epoch 4449 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4449, validation elapsed_time:0.08214616775512695
INFO:base_model:epoch 4449 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4450 Train Loss: 7917

INFO:base_model:epoch 4466 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4467 Train Loss: 7917.00732421875 elapsed: 0.8223936557769775
DEBUG:base_model:epoch 4467 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4467, validation elapsed_time:0.14428019523620605
INFO:base_model:epoch 4467 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4468 Train Loss: 7923.91845703125 elapsed: 0.8251099586486816
DEBUG:base_model:epoch 4468 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4468, validation elapsed_time:0.08446526527404785
INFO:base_model:epoch 4468 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4469 Train Loss: 7937

INFO:base_model:epoch 4485 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4486 Train Loss: 7923.91845703125 elapsed: 0.704291820526123
DEBUG:base_model:epoch 4486 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4486, validation elapsed_time:0.19343209266662598
INFO:base_model:epoch 4486 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4487 Train Loss: 7930.8291015625 elapsed: 0.8064620494842529
DEBUG:base_model:epoch 4487 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4487, validation elapsed_time:0.10792398452758789
INFO:base_model:epoch 4487 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4488 Train Loss: 7937.7

INFO:base_model:epoch 4504 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4505 Train Loss: 7937.740234375 elapsed: 0.8001580238342285
DEBUG:base_model:epoch 4505 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4505, validation elapsed_time:0.11772298812866211
INFO:base_model:epoch 4505 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4506 Train Loss: 7923.91845703125 elapsed: 0.6248328685760498
DEBUG:base_model:epoch 4506 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4506, validation elapsed_time:0.10037589073181152
INFO:base_model:epoch 4506 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4507 Train Loss: 7930.8

INFO:base_model:epoch 4523 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4524 Train Loss: 7937.740234375 elapsed: 0.816298246383667
DEBUG:base_model:epoch 4524 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4524, validation elapsed_time:0.10066413879394531
INFO:base_model:epoch 4524 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4525 Train Loss: 7930.8291015625 elapsed: 0.867088794708252
DEBUG:base_model:epoch 4525 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4525, validation elapsed_time:0.1307508945465088
INFO:base_model:epoch 4525 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4526 Train Loss: 7923.91845

INFO:base_model:epoch 4542 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4543 Train Loss: 7930.8291015625 elapsed: 0.6863181591033936
DEBUG:base_model:epoch 4543 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4543, validation elapsed_time:0.08681297302246094
INFO:base_model:epoch 4543 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4544 Train Loss: 7910.09619140625 elapsed: 0.8245522975921631
DEBUG:base_model:epoch 4544 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4544, validation elapsed_time:0.19612812995910645
INFO:base_model:epoch 4544 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4545 Train Loss: 7896.

INFO:base_model:epoch 4561 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4562 Train Loss: 7923.91845703125 elapsed: 0.8447492122650146
DEBUG:base_model:epoch 4562 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4562, validation elapsed_time:0.11735391616821289
INFO:base_model:epoch 4562 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4563 Train Loss: 7930.8291015625 elapsed: 0.6571438312530518
DEBUG:base_model:epoch 4563 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4563, validation elapsed_time:0.08508801460266113
INFO:base_model:epoch 4563 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4564 Train Loss: 7923.

INFO:base_model:epoch 4580 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4581 Train Loss: 7930.8291015625 elapsed: 0.8371691703796387
DEBUG:base_model:epoch 4581 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4581, validation elapsed_time:0.19194316864013672
INFO:base_model:epoch 4581 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4582 Train Loss: 7930.8291015625 elapsed: 0.8383581638336182
DEBUG:base_model:epoch 4582 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4582, validation elapsed_time:0.0871880054473877
INFO:base_model:epoch 4582 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4583 Train Loss: 7923.91

INFO:base_model:epoch 4599 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4600 Train Loss: 7923.91845703125 elapsed: 0.669853925704956
DEBUG:base_model:epoch 4600 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4600, validation elapsed_time:0.08570313453674316
INFO:base_model:epoch 4600 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4601 Train Loss: 7923.91845703125 elapsed: 0.6719841957092285
DEBUG:base_model:epoch 4601 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4601, validation elapsed_time:0.2789180278778076
INFO:base_model:epoch 4601 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4602 Train Loss: 7937.7

INFO:base_model:epoch 4618 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4619 Train Loss: 7917.00732421875 elapsed: 0.8085739612579346
DEBUG:base_model:epoch 4619 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4619, validation elapsed_time:0.08508110046386719
INFO:base_model:epoch 4619 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4620 Train Loss: 7944.6513671875 elapsed: 0.6392631530761719
DEBUG:base_model:epoch 4620 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4620, validation elapsed_time:0.2165820598602295
INFO:base_model:epoch 4620 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4621 Train Loss: 7937.7

INFO:base_model:epoch 4637 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4638 Train Loss: 7951.5625 elapsed: 0.8496801853179932
DEBUG:base_model:epoch 4638 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4638, validation elapsed_time:0.10155916213989258
INFO:base_model:epoch 4638 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4639 Train Loss: 7930.8291015625 elapsed: 0.7686913013458252
DEBUG:base_model:epoch 4639 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4639, validation elapsed_time:0.08858799934387207
INFO:base_model:epoch 4639 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4640 Train Loss: 7923.9184570

INFO:base_model:epoch 4656 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4657 Train Loss: 7930.8291015625 elapsed: 0.6439671516418457
DEBUG:base_model:epoch 4657 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4657, validation elapsed_time:0.1175699234008789
INFO:base_model:epoch 4657 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4658 Train Loss: 7944.6513671875 elapsed: 0.8976330757141113
DEBUG:base_model:epoch 4658 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4658, validation elapsed_time:0.16817998886108398
INFO:base_model:epoch 4658 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4659 Train Loss: 7910.09

INFO:base_model:epoch 4675 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4676 Train Loss: 7910.09619140625 elapsed: 0.8155429363250732
DEBUG:base_model:epoch 4676 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4676, validation elapsed_time:0.1085512638092041
INFO:base_model:epoch 4676 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4677 Train Loss: 7930.8291015625 elapsed: 0.8311429023742676
DEBUG:base_model:epoch 4677 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4677, validation elapsed_time:0.08545088768005371
INFO:base_model:epoch 4677 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4678 Train Loss: 7910.0

INFO:base_model:epoch 4694 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4695 Train Loss: 7937.740234375 elapsed: 0.7966561317443848
DEBUG:base_model:epoch 4695 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4695, validation elapsed_time:0.1289658546447754
INFO:base_model:epoch 4695 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4696 Train Loss: 7937.740234375 elapsed: 0.6571640968322754
DEBUG:base_model:epoch 4696 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4696, validation elapsed_time:0.22649502754211426
INFO:base_model:epoch 4696 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4697 Train Loss: 7937.7402

INFO:base_model:epoch 4713 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4714 Train Loss: 7937.740234375 elapsed: 0.9039249420166016
DEBUG:base_model:epoch 4714 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4714, validation elapsed_time:0.1325371265411377
INFO:base_model:epoch 4714 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4715 Train Loss: 7923.91845703125 elapsed: 0.7439451217651367
DEBUG:base_model:epoch 4715 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4715, validation elapsed_time:0.08239912986755371
INFO:base_model:epoch 4715 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4716 Train Loss: 7930.82

INFO:base_model:epoch 4732 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4733 Train Loss: 7896.27392578125 elapsed: 0.801403284072876
DEBUG:base_model:epoch 4733 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4733, validation elapsed_time:0.11012101173400879
INFO:base_model:epoch 4733 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4734 Train Loss: 7944.6513671875 elapsed: 0.8477959632873535
DEBUG:base_model:epoch 4734 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4734, validation elapsed_time:0.1390368938446045
INFO:base_model:epoch 4734 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4735 Train Loss: 7930.82

INFO:base_model:epoch 4751 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4752 Train Loss: 7923.91845703125 elapsed: 0.8633668422698975
DEBUG:base_model:epoch 4752 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4752, validation elapsed_time:0.10473322868347168
INFO:base_model:epoch 4752 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4753 Train Loss: 7917.00732421875 elapsed: 0.7396500110626221
DEBUG:base_model:epoch 4753 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4753, validation elapsed_time:0.08536601066589355
INFO:base_model:epoch 4753 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4754 Train Loss: 7944

INFO:base_model:epoch 4770 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4771 Train Loss: 7930.8291015625 elapsed: 0.8038151264190674
DEBUG:base_model:epoch 4771 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4771, validation elapsed_time:0.1544651985168457
INFO:base_model:epoch 4771 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4772 Train Loss: 7923.91845703125 elapsed: 0.8071808815002441
DEBUG:base_model:epoch 4772 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4772, validation elapsed_time:0.12504816055297852
INFO:base_model:epoch 4772 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4773 Train Loss: 7903.1

INFO:base_model:epoch 4789 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4790 Train Loss: 7923.91845703125 elapsed: 0.7111091613769531
DEBUG:base_model:epoch 4790 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4790, validation elapsed_time:0.0848398208618164
INFO:base_model:epoch 4790 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4791 Train Loss: 7910.09619140625 elapsed: 0.7543461322784424
DEBUG:base_model:epoch 4791 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4791, validation elapsed_time:0.08537411689758301
INFO:base_model:epoch 4791 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4792 Train Loss: 7930.

INFO:base_model:epoch 4808 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4809 Train Loss: 7930.8291015625 elapsed: 0.8713598251342773
DEBUG:base_model:epoch 4809 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4809, validation elapsed_time:0.09737682342529297
INFO:base_model:epoch 4809 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4810 Train Loss: 7923.91845703125 elapsed: 0.6716468334197998
DEBUG:base_model:epoch 4810 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4810, validation elapsed_time:0.08714509010314941
INFO:base_model:epoch 4810 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4811 Train Loss: 7923.

INFO:base_model:epoch 4827 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4828 Train Loss: 7917.00732421875 elapsed: 0.8714139461517334
DEBUG:base_model:epoch 4828 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4828, validation elapsed_time:0.14715003967285156
INFO:base_model:epoch 4828 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4829 Train Loss: 7930.8291015625 elapsed: 0.7891519069671631
DEBUG:base_model:epoch 4829 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4829, validation elapsed_time:0.13741779327392578
INFO:base_model:epoch 4829 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4830 Train Loss: 7903.

INFO:base_model:epoch 4846 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4847 Train Loss: 7937.740234375 elapsed: 0.7386898994445801
DEBUG:base_model:epoch 4847 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4847, validation elapsed_time:0.08249402046203613
INFO:base_model:epoch 4847 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4848 Train Loss: 7923.91845703125 elapsed: 0.8825259208679199
DEBUG:base_model:epoch 4848 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4848, validation elapsed_time:0.08254313468933105
INFO:base_model:epoch 4848 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4849 Train Loss: 7923.9

INFO:base_model:epoch 4865 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4866 Train Loss: 7903.18505859375 elapsed: 0.7710750102996826
DEBUG:base_model:epoch 4866 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4866, validation elapsed_time:0.18239092826843262
INFO:base_model:epoch 4866 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4867 Train Loss: 7917.00732421875 elapsed: 0.8249762058258057
DEBUG:base_model:epoch 4867 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4867, validation elapsed_time:0.08826613426208496
INFO:base_model:epoch 4867 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4868 Train Loss: 7910

INFO:base_model:epoch 4884 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4885 Train Loss: 7923.91845703125 elapsed: 0.8182148933410645
DEBUG:base_model:epoch 4885 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4885, validation elapsed_time:0.08745002746582031
INFO:base_model:epoch 4885 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4886 Train Loss: 7937.740234375 elapsed: 0.7718410491943359
DEBUG:base_model:epoch 4886 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4886, validation elapsed_time:0.20355701446533203
INFO:base_model:epoch 4886 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4887 Train Loss: 7930.8

INFO:base_model:epoch 4903 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4904 Train Loss: 7923.91845703125 elapsed: 0.7079589366912842
DEBUG:base_model:epoch 4904 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4904, validation elapsed_time:0.08233022689819336
INFO:base_model:epoch 4904 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4905 Train Loss: 7923.91845703125 elapsed: 0.8529207706451416
DEBUG:base_model:epoch 4905 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4905, validation elapsed_time:0.0827951431274414
INFO:base_model:epoch 4905 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4906 Train Loss: 7937.

INFO:base_model:epoch 4922 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4923 Train Loss: 7910.09619140625 elapsed: 0.7723047733306885
DEBUG:base_model:epoch 4923 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4923, validation elapsed_time:0.15955400466918945
INFO:base_model:epoch 4923 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4924 Train Loss: 7917.00732421875 elapsed: 0.7331030368804932
DEBUG:base_model:epoch 4924 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4924, validation elapsed_time:0.12818479537963867
INFO:base_model:epoch 4924 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4925 Train Loss: 7917

INFO:base_model:epoch 4941 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4942 Train Loss: 7930.8291015625 elapsed: 0.8936469554901123
DEBUG:base_model:epoch 4942 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4942, validation elapsed_time:0.10534787178039551
INFO:base_model:epoch 4942 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4943 Train Loss: 7930.8291015625 elapsed: 0.7977979183197021
DEBUG:base_model:epoch 4943 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4943, validation elapsed_time:0.0785362720489502
INFO:base_model:epoch 4943 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4944 Train Loss: 7910.09

INFO:base_model:epoch 4960 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4961 Train Loss: 7910.09619140625 elapsed: 0.6203169822692871
DEBUG:base_model:epoch 4961 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4961, validation elapsed_time:0.28777384757995605
INFO:base_model:epoch 4961 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4962 Train Loss: 7937.740234375 elapsed: 0.6796119213104248
DEBUG:base_model:epoch 4962 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4962, validation elapsed_time:0.15892601013183594
INFO:base_model:epoch 4962 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4963 Train Loss: 7937.7

INFO:base_model:epoch 4979 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4980 Train Loss: 7930.8291015625 elapsed: 0.8629660606384277
DEBUG:base_model:epoch 4980 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4980, validation elapsed_time:0.11211681365966797
INFO:base_model:epoch 4980 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4981 Train Loss: 7917.00732421875 elapsed: 1.0768320560455322
DEBUG:base_model:epoch 4981 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4981, validation elapsed_time:0.1858670711517334
INFO:base_model:epoch 4981 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4982 Train Loss: 7944.6

INFO:base_model:epoch 4998 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 4999 Train Loss: 7923.91845703125 elapsed: 0.7917649745941162
DEBUG:base_model:epoch 4999 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:4999, validation elapsed_time:0.14674687385559082
INFO:base_model:epoch 4999 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5000 Train Loss: 7937.740234375 elapsed: 0.7714409828186035
DEBUG:base_model:epoch 5000 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5000, validation elapsed_time:0.12276315689086914
INFO:base_model:epoch 5000 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5001 Train Loss: 7917.0

INFO:base_model:epoch 5017 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5018 Train Loss: 7930.8291015625 elapsed: 0.809013843536377
DEBUG:base_model:epoch 5018 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5018, validation elapsed_time:0.08441972732543945
INFO:base_model:epoch 5018 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5019 Train Loss: 7930.8291015625 elapsed: 0.6179850101470947
DEBUG:base_model:epoch 5019 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5019, validation elapsed_time:0.26018190383911133
INFO:base_model:epoch 5019 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5020 Train Loss: 7930.82

INFO:base_model:epoch 5036 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5037 Train Loss: 7944.6513671875 elapsed: 0.6977748870849609
DEBUG:base_model:epoch 5037 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5037, validation elapsed_time:0.10721993446350098
INFO:base_model:epoch 5037 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5038 Train Loss: 7937.740234375 elapsed: 0.6654560565948486
DEBUG:base_model:epoch 5038 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5038, validation elapsed_time:0.16279911994934082
INFO:base_model:epoch 5038 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5039 Train Loss: 7937.74

INFO:base_model:epoch 5055 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5056 Train Loss: 7930.8291015625 elapsed: 0.7198951244354248
DEBUG:base_model:epoch 5056 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5056, validation elapsed_time:0.12125110626220703
INFO:base_model:epoch 5056 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5057 Train Loss: 7937.740234375 elapsed: 0.7653307914733887
DEBUG:base_model:epoch 5057 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5057, validation elapsed_time:0.10393381118774414
INFO:base_model:epoch 5057 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5058 Train Loss: 7923.91

INFO:base_model:epoch 5074 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5075 Train Loss: 7923.91845703125 elapsed: 0.8185217380523682
DEBUG:base_model:epoch 5075 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5075, validation elapsed_time:0.08733415603637695
INFO:base_model:epoch 5075 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5076 Train Loss: 7944.6513671875 elapsed: 0.6573772430419922
DEBUG:base_model:epoch 5076 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5076, validation elapsed_time:0.2640540599822998
INFO:base_model:epoch 5076 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5077 Train Loss: 7930.8

INFO:base_model:epoch 5093 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5094 Train Loss: 7923.91845703125 elapsed: 0.765507698059082
DEBUG:base_model:epoch 5094 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5094, validation elapsed_time:0.08829903602600098
INFO:base_model:epoch 5094 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5095 Train Loss: 7923.91845703125 elapsed: 0.6708829402923584
DEBUG:base_model:epoch 5095 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5095, validation elapsed_time:0.2453930377960205
INFO:base_model:epoch 5095 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5096 Train Loss: 7923.9

INFO:base_model:epoch 5112 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5113 Train Loss: 7923.91845703125 elapsed: 0.8212893009185791
DEBUG:base_model:epoch 5113 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5113, validation elapsed_time:0.08751177787780762
INFO:base_model:epoch 5113 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5114 Train Loss: 7944.6513671875 elapsed: 0.7811720371246338
DEBUG:base_model:epoch 5114 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5114, validation elapsed_time:0.08256793022155762
INFO:base_model:epoch 5114 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5115 Train Loss: 7917.

INFO:base_model:epoch 5131 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5132 Train Loss: 7944.6513671875 elapsed: 0.8501460552215576
DEBUG:base_model:epoch 5132 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5132, validation elapsed_time:0.11350297927856445
INFO:base_model:epoch 5132 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5133 Train Loss: 7903.18505859375 elapsed: 1.0118789672851562
DEBUG:base_model:epoch 5133 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5133, validation elapsed_time:0.2125101089477539
INFO:base_model:epoch 5133 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5134 Train Loss: 7910.0

INFO:base_model:epoch 5150 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5151 Train Loss: 7903.18505859375 elapsed: 0.7688450813293457
DEBUG:base_model:epoch 5151 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5151, validation elapsed_time:0.11529397964477539
INFO:base_model:epoch 5151 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5152 Train Loss: 7930.8291015625 elapsed: 0.7497479915618896
DEBUG:base_model:epoch 5152 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5152, validation elapsed_time:0.09059500694274902
INFO:base_model:epoch 5152 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5153 Train Loss: 7923.

INFO:base_model:epoch 5169 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5170 Train Loss: 7937.740234375 elapsed: 0.7527680397033691
DEBUG:base_model:epoch 5170 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5170, validation elapsed_time:0.08377599716186523
INFO:base_model:epoch 5170 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5171 Train Loss: 7944.6513671875 elapsed: 0.8335180282592773
DEBUG:base_model:epoch 5171 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5171, validation elapsed_time:0.08750009536743164
INFO:base_model:epoch 5171 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5172 Train Loss: 7930.82

INFO:base_model:epoch 5188 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5189 Train Loss: 7923.91845703125 elapsed: 0.6833851337432861
DEBUG:base_model:epoch 5189 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5189, validation elapsed_time:0.09405922889709473
INFO:base_model:epoch 5189 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5190 Train Loss: 7917.00732421875 elapsed: 0.7952911853790283
DEBUG:base_model:epoch 5190 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5190, validation elapsed_time:0.21594786643981934
INFO:base_model:epoch 5190 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5191 Train Loss: 7923

INFO:base_model:epoch 5207 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5208 Train Loss: 7944.6513671875 elapsed: 0.7911381721496582
DEBUG:base_model:epoch 5208 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5208, validation elapsed_time:0.14593982696533203
INFO:base_model:epoch 5208 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5209 Train Loss: 7917.00732421875 elapsed: 0.7928009033203125
DEBUG:base_model:epoch 5209 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5209, validation elapsed_time:0.16909003257751465
INFO:base_model:epoch 5209 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5210 Train Loss: 7937.

INFO:base_model:epoch 5226 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5227 Train Loss: 7937.740234375 elapsed: 0.8114500045776367
DEBUG:base_model:epoch 5227 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5227, validation elapsed_time:0.08761000633239746
INFO:base_model:epoch 5227 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5228 Train Loss: 7923.91845703125 elapsed: 0.8003709316253662
DEBUG:base_model:epoch 5228 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5228, validation elapsed_time:0.08855795860290527
INFO:base_model:epoch 5228 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5229 Train Loss: 7944.6

INFO:base_model:epoch 5245 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5246 Train Loss: 7930.8291015625 elapsed: 0.6232349872589111
DEBUG:base_model:epoch 5246 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5246, validation elapsed_time:0.10466885566711426
INFO:base_model:epoch 5246 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5247 Train Loss: 7937.740234375 elapsed: 0.8231050968170166
DEBUG:base_model:epoch 5247 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5247, validation elapsed_time:0.15235304832458496
INFO:base_model:epoch 5247 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5248 Train Loss: 7910.09

INFO:base_model:epoch 5264 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5265 Train Loss: 7910.09619140625 elapsed: 0.7051558494567871
DEBUG:base_model:epoch 5265 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5265, validation elapsed_time:0.1701970100402832
INFO:base_model:epoch 5265 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5266 Train Loss: 7930.8291015625 elapsed: 0.8148012161254883
DEBUG:base_model:epoch 5266 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5266, validation elapsed_time:0.12626886367797852
INFO:base_model:epoch 5266 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5267 Train Loss: 7923.9

INFO:base_model:epoch 5283 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5284 Train Loss: 7896.27392578125 elapsed: 0.908811092376709
DEBUG:base_model:epoch 5284 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5284, validation elapsed_time:0.2708261013031006
INFO:base_model:epoch 5284 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5285 Train Loss: 7923.91845703125 elapsed: 0.7748382091522217
DEBUG:base_model:epoch 5285 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5285, validation elapsed_time:0.08507919311523438
INFO:base_model:epoch 5285 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5286 Train Loss: 7923.9

INFO:base_model:epoch 5302 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5303 Train Loss: 7930.8291015625 elapsed: 0.6110248565673828
DEBUG:base_model:epoch 5303 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5303, validation elapsed_time:0.08364105224609375
INFO:base_model:epoch 5303 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5304 Train Loss: 7937.740234375 elapsed: 0.7739431858062744
DEBUG:base_model:epoch 5304 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5304, validation elapsed_time:0.24112629890441895
INFO:base_model:epoch 5304 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5305 Train Loss: 7889.36

INFO:base_model:epoch 5321 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5322 Train Loss: 7910.09619140625 elapsed: 0.8165431022644043
DEBUG:base_model:epoch 5322 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5322, validation elapsed_time:0.09718823432922363
INFO:base_model:epoch 5322 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5323 Train Loss: 7917.00732421875 elapsed: 0.8023772239685059
DEBUG:base_model:epoch 5323 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5323, validation elapsed_time:0.1480388641357422
INFO:base_model:epoch 5323 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5324 Train Loss: 7917.

INFO:base_model:epoch 5340 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5341 Train Loss: 7910.09619140625 elapsed: 0.9435648918151855
DEBUG:base_model:epoch 5341 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5341, validation elapsed_time:0.08928823471069336
INFO:base_model:epoch 5341 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5342 Train Loss: 7944.6513671875 elapsed: 0.8088939189910889
DEBUG:base_model:epoch 5342 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5342, validation elapsed_time:0.13703608512878418
INFO:base_model:epoch 5342 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5343 Train Loss: 7930.

INFO:base_model:epoch 5359 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5360 Train Loss: 7930.8291015625 elapsed: 0.8296389579772949
DEBUG:base_model:epoch 5360 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5360, validation elapsed_time:0.08319473266601562
INFO:base_model:epoch 5360 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5361 Train Loss: 7937.740234375 elapsed: 0.6430838108062744
DEBUG:base_model:epoch 5361 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5361, validation elapsed_time:0.27529335021972656
INFO:base_model:epoch 5361 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5362 Train Loss: 7930.82

INFO:base_model:epoch 5378 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5379 Train Loss: 7923.91845703125 elapsed: 0.7935328483581543
DEBUG:base_model:epoch 5379 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5379, validation elapsed_time:0.08057498931884766
INFO:base_model:epoch 5379 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5380 Train Loss: 7917.00732421875 elapsed: 0.7628350257873535
DEBUG:base_model:epoch 5380 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5380, validation elapsed_time:0.17586112022399902
INFO:base_model:epoch 5380 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5381 Train Loss: 7930

INFO:base_model:epoch 5397 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5398 Train Loss: 7930.8291015625 elapsed: 0.8163731098175049
DEBUG:base_model:epoch 5398 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5398, validation elapsed_time:0.20060396194458008
INFO:base_model:epoch 5398 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5399 Train Loss: 7917.00732421875 elapsed: 1.8120276927947998
DEBUG:base_model:epoch 5399 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5399, validation elapsed_time:0.24696588516235352
INFO:base_model:epoch 5399 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5400 Train Loss: 7930.

INFO:base_model:epoch 5416 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5417 Train Loss: 7930.8291015625 elapsed: 0.6418359279632568
DEBUG:base_model:epoch 5417 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5417, validation elapsed_time:0.2855079174041748
INFO:base_model:epoch 5417 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5418 Train Loss: 7930.8291015625 elapsed: 0.7722201347351074
DEBUG:base_model:epoch 5418 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5418, validation elapsed_time:0.1564631462097168
INFO:base_model:epoch 5418 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5419 Train Loss: 7937.740

INFO:base_model:epoch 5435 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5436 Train Loss: 7910.09619140625 elapsed: 0.6608178615570068
DEBUG:base_model:epoch 5436 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5436, validation elapsed_time:0.17475080490112305
INFO:base_model:epoch 5436 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5437 Train Loss: 7930.8291015625 elapsed: 0.7928497791290283
DEBUG:base_model:epoch 5437 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5437, validation elapsed_time:0.08507609367370605
INFO:base_model:epoch 5437 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5438 Train Loss: 7917.

INFO:base_model:epoch 5454 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5455 Train Loss: 7930.8291015625 elapsed: 0.7856810092926025
DEBUG:base_model:epoch 5455 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5455, validation elapsed_time:0.1000370979309082
INFO:base_model:epoch 5455 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5456 Train Loss: 7923.91845703125 elapsed: 0.7695128917694092
DEBUG:base_model:epoch 5456 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5456, validation elapsed_time:0.12082886695861816
INFO:base_model:epoch 5456 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5457 Train Loss: 7937.7

INFO:base_model:epoch 5473 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5474 Train Loss: 7923.91845703125 elapsed: 0.779832124710083
DEBUG:base_model:epoch 5474 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5474, validation elapsed_time:0.09245586395263672
INFO:base_model:epoch 5474 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5475 Train Loss: 7910.09619140625 elapsed: 0.795753002166748
DEBUG:base_model:epoch 5475 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5475, validation elapsed_time:0.12750005722045898
INFO:base_model:epoch 5475 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5476 Train Loss: 7930.8

INFO:base_model:epoch 5492 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5493 Train Loss: 7917.00732421875 elapsed: 0.7686710357666016
DEBUG:base_model:epoch 5493 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5493, validation elapsed_time:0.13448214530944824
INFO:base_model:epoch 5493 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5494 Train Loss: 7937.740234375 elapsed: 0.8574779033660889
DEBUG:base_model:epoch 5494 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5494, validation elapsed_time:0.10345602035522461
INFO:base_model:epoch 5494 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5495 Train Loss: 7923.9

INFO:base_model:epoch 5511 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5512 Train Loss: 7930.8291015625 elapsed: 0.7039217948913574
DEBUG:base_model:epoch 5512 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5512, validation elapsed_time:0.09122800827026367
INFO:base_model:epoch 5512 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5513 Train Loss: 7923.91845703125 elapsed: 0.8991470336914062
DEBUG:base_model:epoch 5513 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5513, validation elapsed_time:0.17070388793945312
INFO:base_model:epoch 5513 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5514 Train Loss: 7930.

INFO:base_model:epoch 5530 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5531 Train Loss: 7930.8291015625 elapsed: 0.9740517139434814
DEBUG:base_model:epoch 5531 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5531, validation elapsed_time:0.2039170265197754
INFO:base_model:epoch 5531 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5532 Train Loss: 7930.8291015625 elapsed: 0.9240128993988037
DEBUG:base_model:epoch 5532 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5532, validation elapsed_time:0.2440340518951416
INFO:base_model:epoch 5532 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5533 Train Loss: 7930.829

INFO:base_model:epoch 5549 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5550 Train Loss: 7923.91845703125 elapsed: 0.8187551498413086
DEBUG:base_model:epoch 5550 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5550, validation elapsed_time:0.08544182777404785
INFO:base_model:epoch 5550 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5551 Train Loss: 7930.8291015625 elapsed: 0.7489299774169922
DEBUG:base_model:epoch 5551 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5551, validation elapsed_time:0.09184002876281738
INFO:base_model:epoch 5551 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5552 Train Loss: 7923.

INFO:base_model:epoch 5568 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5569 Train Loss: 7930.8291015625 elapsed: 0.8344378471374512
DEBUG:base_model:epoch 5569 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5569, validation elapsed_time:0.14956998825073242
INFO:base_model:epoch 5569 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5570 Train Loss: 7923.91845703125 elapsed: 0.805772066116333
DEBUG:base_model:epoch 5570 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5570, validation elapsed_time:0.09737706184387207
INFO:base_model:epoch 5570 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5571 Train Loss: 7930.8

INFO:base_model:epoch 5587 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5588 Train Loss: 7917.00732421875 elapsed: 0.8479089736938477
DEBUG:base_model:epoch 5588 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5588, validation elapsed_time:0.10697579383850098
INFO:base_model:epoch 5588 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5589 Train Loss: 7930.8291015625 elapsed: 0.8130450248718262
DEBUG:base_model:epoch 5589 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5589, validation elapsed_time:0.08446168899536133
INFO:base_model:epoch 5589 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5590 Train Loss: 7923.

INFO:base_model:epoch 5606 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5607 Train Loss: 7944.6513671875 elapsed: 0.8204419612884521
DEBUG:base_model:epoch 5607 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5607, validation elapsed_time:0.08639907836914062
INFO:base_model:epoch 5607 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5608 Train Loss: 7903.18505859375 elapsed: 0.8244919776916504
DEBUG:base_model:epoch 5608 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5608, validation elapsed_time:0.15549612045288086
INFO:base_model:epoch 5608 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5609 Train Loss: 7917.

INFO:base_model:epoch 5625 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5626 Train Loss: 7937.740234375 elapsed: 0.7919819355010986
DEBUG:base_model:epoch 5626 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5626, validation elapsed_time:0.1520700454711914
INFO:base_model:epoch 5626 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5627 Train Loss: 7923.91845703125 elapsed: 0.666248083114624
DEBUG:base_model:epoch 5627 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5627, validation elapsed_time:0.10003304481506348
INFO:base_model:epoch 5627 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5628 Train Loss: 7930.829

INFO:base_model:epoch 5644 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5645 Train Loss: 7930.8291015625 elapsed: 0.8056020736694336
DEBUG:base_model:epoch 5645 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5645, validation elapsed_time:0.08182072639465332
INFO:base_model:epoch 5645 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5646 Train Loss: 7937.740234375 elapsed: 0.6780800819396973
DEBUG:base_model:epoch 5646 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5646, validation elapsed_time:0.2590372562408447
INFO:base_model:epoch 5646 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5647 Train Loss: 7923.918

INFO:base_model:epoch 5663 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5664 Train Loss: 7917.00732421875 elapsed: 0.7884500026702881
DEBUG:base_model:epoch 5664 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5664, validation elapsed_time:0.10113310813903809
INFO:base_model:epoch 5664 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5665 Train Loss: 7910.09619140625 elapsed: 0.8502559661865234
DEBUG:base_model:epoch 5665 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5665, validation elapsed_time:0.11581707000732422
INFO:base_model:epoch 5665 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5666 Train Loss: 7930

INFO:base_model:epoch 5682 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5683 Train Loss: 7917.00732421875 elapsed: 0.9284029006958008
DEBUG:base_model:epoch 5683 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5683, validation elapsed_time:0.16461706161499023
INFO:base_model:epoch 5683 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5684 Train Loss: 7910.09619140625 elapsed: 0.804318904876709
DEBUG:base_model:epoch 5684 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5684, validation elapsed_time:0.14691591262817383
INFO:base_model:epoch 5684 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5685 Train Loss: 7923.

INFO:base_model:epoch 5701 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5702 Train Loss: 7930.8291015625 elapsed: 0.7117922306060791
DEBUG:base_model:epoch 5702 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5702, validation elapsed_time:0.10690498352050781
INFO:base_model:epoch 5702 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5703 Train Loss: 7930.8291015625 elapsed: 0.8501410484313965
DEBUG:base_model:epoch 5703 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5703, validation elapsed_time:0.08607912063598633
INFO:base_model:epoch 5703 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5704 Train Loss: 7937.7

INFO:base_model:epoch 5720 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5721 Train Loss: 7930.8291015625 elapsed: 0.7777247428894043
DEBUG:base_model:epoch 5721 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5721, validation elapsed_time:0.24088287353515625
INFO:base_model:epoch 5721 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5722 Train Loss: 7923.91845703125 elapsed: 0.8240928649902344
DEBUG:base_model:epoch 5722 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5722, validation elapsed_time:0.08461999893188477
INFO:base_model:epoch 5722 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5723 Train Loss: 7923.

INFO:base_model:epoch 5739 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5740 Train Loss: 7944.6513671875 elapsed: 0.778167724609375
DEBUG:base_model:epoch 5740 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5740, validation elapsed_time:0.08730506896972656
INFO:base_model:epoch 5740 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5741 Train Loss: 7937.740234375 elapsed: 0.7486259937286377
DEBUG:base_model:epoch 5741 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5741, validation elapsed_time:0.08208680152893066
INFO:base_model:epoch 5741 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5742 Train Loss: 7917.007

INFO:base_model:epoch 5758 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5759 Train Loss: 7944.6513671875 elapsed: 0.8205711841583252
DEBUG:base_model:epoch 5759 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5759, validation elapsed_time:0.08282089233398438
INFO:base_model:epoch 5759 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5760 Train Loss: 7951.5625 elapsed: 0.7800829410552979
DEBUG:base_model:epoch 5760 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5760, validation elapsed_time:0.09230494499206543
INFO:base_model:epoch 5760 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5761 Train Loss: 7923.9184570

INFO:base_model:epoch 5777 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5778 Train Loss: 7917.00732421875 elapsed: 0.9652440547943115
DEBUG:base_model:epoch 5778 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5778, validation elapsed_time:0.1977238655090332
INFO:base_model:epoch 5778 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5779 Train Loss: 7910.09619140625 elapsed: 0.8000509738922119
DEBUG:base_model:epoch 5779 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5779, validation elapsed_time:0.12994027137756348
INFO:base_model:epoch 5779 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5780 Train Loss: 7930.

INFO:base_model:epoch 5796 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5797 Train Loss: 7923.91845703125 elapsed: 0.6260490417480469
DEBUG:base_model:epoch 5797 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5797, validation elapsed_time:0.09822726249694824
INFO:base_model:epoch 5797 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5798 Train Loss: 7923.91845703125 elapsed: 0.8736162185668945
DEBUG:base_model:epoch 5798 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5798, validation elapsed_time:0.12087202072143555
INFO:base_model:epoch 5798 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5799 Train Loss: 7930

INFO:base_model:epoch 5815 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5816 Train Loss: 7917.00732421875 elapsed: 0.8398802280426025
DEBUG:base_model:epoch 5816 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5816, validation elapsed_time:0.11869001388549805
INFO:base_model:epoch 5816 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5817 Train Loss: 7903.18505859375 elapsed: 0.7993578910827637
DEBUG:base_model:epoch 5817 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5817, validation elapsed_time:0.0802621841430664
INFO:base_model:epoch 5817 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5818 Train Loss: 7944.

INFO:base_model:epoch 5834 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5835 Train Loss: 7917.00732421875 elapsed: 0.7515301704406738
DEBUG:base_model:epoch 5835 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5835, validation elapsed_time:0.08434414863586426
INFO:base_model:epoch 5835 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5836 Train Loss: 7917.00732421875 elapsed: 0.8694100379943848
DEBUG:base_model:epoch 5836 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5836, validation elapsed_time:0.08586502075195312
INFO:base_model:epoch 5836 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5837 Train Loss: 7930

INFO:base_model:epoch 5853 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5854 Train Loss: 7944.6513671875 elapsed: 0.7368240356445312
DEBUG:base_model:epoch 5854 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5854, validation elapsed_time:0.12808680534362793
INFO:base_model:epoch 5854 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5855 Train Loss: 7923.91845703125 elapsed: 0.8606991767883301
DEBUG:base_model:epoch 5855 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5855, validation elapsed_time:0.13106703758239746
INFO:base_model:epoch 5855 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5856 Train Loss: 7923.

INFO:base_model:epoch 5872 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5873 Train Loss: 7937.740234375 elapsed: 0.6709420680999756
DEBUG:base_model:epoch 5873 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5873, validation elapsed_time:0.219407320022583
INFO:base_model:epoch 5873 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5874 Train Loss: 7910.09619140625 elapsed: 0.8913331031799316
DEBUG:base_model:epoch 5874 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5874, validation elapsed_time:0.14481592178344727
INFO:base_model:epoch 5874 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5875 Train Loss: 7910.096

INFO:base_model:epoch 5891 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5892 Train Loss: 7903.18505859375 elapsed: 0.8981418609619141
DEBUG:base_model:epoch 5892 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5892, validation elapsed_time:0.11562514305114746
INFO:base_model:epoch 5892 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5893 Train Loss: 7937.740234375 elapsed: 0.8045201301574707
DEBUG:base_model:epoch 5893 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5893, validation elapsed_time:0.08343720436096191
INFO:base_model:epoch 5893 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5894 Train Loss: 7910.0

INFO:base_model:epoch 5910 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5911 Train Loss: 7910.09619140625 elapsed: 0.7086939811706543
DEBUG:base_model:epoch 5911 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5911, validation elapsed_time:0.2287302017211914
INFO:base_model:epoch 5911 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5912 Train Loss: 7917.00732421875 elapsed: 0.8077280521392822
DEBUG:base_model:epoch 5912 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5912, validation elapsed_time:0.11981201171875
INFO:base_model:epoch 5912 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5913 Train Loss: 7923.918

INFO:base_model:epoch 5929 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5930 Train Loss: 7930.8291015625 elapsed: 0.7055809497833252
DEBUG:base_model:epoch 5930 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5930, validation elapsed_time:0.08559298515319824
INFO:base_model:epoch 5930 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5931 Train Loss: 7917.00732421875 elapsed: 0.9261980056762695
DEBUG:base_model:epoch 5931 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5931, validation elapsed_time:0.12255501747131348
INFO:base_model:epoch 5931 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5932 Train Loss: 7944.

INFO:base_model:epoch 5948 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5949 Train Loss: 7930.8291015625 elapsed: 0.8156368732452393
DEBUG:base_model:epoch 5949 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5949, validation elapsed_time:0.08367729187011719
INFO:base_model:epoch 5949 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5950 Train Loss: 7917.00732421875 elapsed: 0.6671350002288818
DEBUG:base_model:epoch 5950 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5950, validation elapsed_time:0.2623097896575928
INFO:base_model:epoch 5950 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5951 Train Loss: 7930.8

INFO:base_model:epoch 5967 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5968 Train Loss: 7923.91845703125 elapsed: 0.7928690910339355
DEBUG:base_model:epoch 5968 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5968, validation elapsed_time:0.14562606811523438
INFO:base_model:epoch 5968 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5969 Train Loss: 7944.6513671875 elapsed: 0.8009920120239258
DEBUG:base_model:epoch 5969 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5969, validation elapsed_time:0.09767293930053711
INFO:base_model:epoch 5969 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5970 Train Loss: 7937.

INFO:base_model:epoch 5986 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5987 Train Loss: 7917.00732421875 elapsed: 0.876396656036377
DEBUG:base_model:epoch 5987 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5987, validation elapsed_time:0.12851691246032715
INFO:base_model:epoch 5987 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5988 Train Loss: 7917.00732421875 elapsed: 0.6225202083587646
DEBUG:base_model:epoch 5988 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:5988, validation elapsed_time:0.09952068328857422
INFO:base_model:epoch 5988 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 5989 Train Loss: 7923.

INFO:base_model:epoch 6005 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6006 Train Loss: 7910.09619140625 elapsed: 0.8490192890167236
DEBUG:base_model:epoch 6006 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6006, validation elapsed_time:0.08274197578430176
INFO:base_model:epoch 6006 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6007 Train Loss: 7937.740234375 elapsed: 0.8194029331207275
DEBUG:base_model:epoch 6007 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6007, validation elapsed_time:0.14034295082092285
INFO:base_model:epoch 6007 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6008 Train Loss: 7930.8

INFO:base_model:epoch 6024 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6025 Train Loss: 7923.91845703125 elapsed: 0.8362510204315186
DEBUG:base_model:epoch 6025 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6025, validation elapsed_time:0.17867112159729004
INFO:base_model:epoch 6025 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6026 Train Loss: 7923.91845703125 elapsed: 0.7648928165435791
DEBUG:base_model:epoch 6026 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6026, validation elapsed_time:0.09583687782287598
INFO:base_model:epoch 6026 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6027 Train Loss: 7917

INFO:base_model:epoch 6043 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6044 Train Loss: 7917.00732421875 elapsed: 0.6941900253295898
DEBUG:base_model:epoch 6044 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6044, validation elapsed_time:0.10825014114379883
INFO:base_model:epoch 6044 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6045 Train Loss: 7917.00732421875 elapsed: 0.9028491973876953
DEBUG:base_model:epoch 6045 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6045, validation elapsed_time:0.1168980598449707
INFO:base_model:epoch 6045 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6046 Train Loss: 7937.

INFO:base_model:epoch 6062 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6063 Train Loss: 7910.09619140625 elapsed: 0.852607011795044
DEBUG:base_model:epoch 6063 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6063, validation elapsed_time:0.10244297981262207
INFO:base_model:epoch 6063 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6064 Train Loss: 7917.00732421875 elapsed: 0.7600588798522949
DEBUG:base_model:epoch 6064 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6064, validation elapsed_time:0.0823049545288086
INFO:base_model:epoch 6064 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6065 Train Loss: 7923.9

INFO:base_model:epoch 6081 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6082 Train Loss: 7923.91845703125 elapsed: 0.7796471118927002
DEBUG:base_model:epoch 6082 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6082, validation elapsed_time:0.08645200729370117
INFO:base_model:epoch 6082 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6083 Train Loss: 7930.8291015625 elapsed: 0.8343071937561035
DEBUG:base_model:epoch 6083 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6083, validation elapsed_time:0.09391427040100098
INFO:base_model:epoch 6083 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6084 Train Loss: 7923.

INFO:base_model:epoch 6100 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6101 Train Loss: 7910.09619140625 elapsed: 0.79622483253479
DEBUG:base_model:epoch 6101 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6101, validation elapsed_time:0.16265535354614258
INFO:base_model:epoch 6101 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6102 Train Loss: 7923.91845703125 elapsed: 0.8419771194458008
DEBUG:base_model:epoch 6102 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6102, validation elapsed_time:0.12801003456115723
INFO:base_model:epoch 6102 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6103 Train Loss: 7923.9

INFO:base_model:epoch 6119 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6120 Train Loss: 7910.09619140625 elapsed: 0.910545825958252
DEBUG:base_model:epoch 6120 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6120, validation elapsed_time:0.3864278793334961
INFO:base_model:epoch 6120 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6121 Train Loss: 7923.91845703125 elapsed: 0.9393291473388672
DEBUG:base_model:epoch 6121 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6121, validation elapsed_time:0.08727598190307617
INFO:base_model:epoch 6121 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6122 Train Loss: 7937.7

INFO:base_model:epoch 6138 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6139 Train Loss: 7930.8291015625 elapsed: 0.7893118858337402
DEBUG:base_model:epoch 6139 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6139, validation elapsed_time:0.08845973014831543
INFO:base_model:epoch 6139 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6140 Train Loss: 7937.740234375 elapsed: 0.8708348274230957
DEBUG:base_model:epoch 6140 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6140, validation elapsed_time:0.12193679809570312
INFO:base_model:epoch 6140 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6141 Train Loss: 7923.91

INFO:base_model:epoch 6157 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6158 Train Loss: 7937.740234375 elapsed: 0.9778690338134766
DEBUG:base_model:epoch 6158 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6158, validation elapsed_time:0.1204371452331543
INFO:base_model:epoch 6158 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6159 Train Loss: 7930.8291015625 elapsed: 0.7186620235443115
DEBUG:base_model:epoch 6159 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6159, validation elapsed_time:0.10215330123901367
INFO:base_model:epoch 6159 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6160 Train Loss: 7923.918

INFO:base_model:epoch 6176 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6177 Train Loss: 7930.8291015625 elapsed: 0.8065080642700195
DEBUG:base_model:epoch 6177 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6177, validation elapsed_time:0.09332609176635742
INFO:base_model:epoch 6177 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6178 Train Loss: 7937.740234375 elapsed: 0.6696481704711914
DEBUG:base_model:epoch 6178 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6178, validation elapsed_time:0.3125119209289551
INFO:base_model:epoch 6178 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6179 Train Loss: 7923.918

INFO:base_model:epoch 6195 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6196 Train Loss: 7910.09619140625 elapsed: 0.7555451393127441
DEBUG:base_model:epoch 6196 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6196, validation elapsed_time:0.09026408195495605
INFO:base_model:epoch 6196 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6197 Train Loss: 7923.91845703125 elapsed: 0.8482329845428467
DEBUG:base_model:epoch 6197 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6197, validation elapsed_time:0.15222811698913574
INFO:base_model:epoch 6197 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6198 Train Loss: 7937

INFO:base_model:epoch 6214 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6215 Train Loss: 7930.8291015625 elapsed: 0.7568719387054443
DEBUG:base_model:epoch 6215 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6215, validation elapsed_time:0.10197925567626953
INFO:base_model:epoch 6215 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6216 Train Loss: 7910.09619140625 elapsed: 0.6930370330810547
DEBUG:base_model:epoch 6216 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6216, validation elapsed_time:0.22480988502502441
INFO:base_model:epoch 6216 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6217 Train Loss: 7923.

INFO:base_model:epoch 6233 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6234 Train Loss: 7937.740234375 elapsed: 0.8864309787750244
DEBUG:base_model:epoch 6234 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6234, validation elapsed_time:0.08426213264465332
INFO:base_model:epoch 6234 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6235 Train Loss: 7951.5625 elapsed: 0.7919440269470215
DEBUG:base_model:epoch 6235 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6235, validation elapsed_time:0.15717792510986328
INFO:base_model:epoch 6235 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6236 Train Loss: 7903.18505859

INFO:base_model:epoch 6252 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6253 Train Loss: 7917.00732421875 elapsed: 0.8106751441955566
DEBUG:base_model:epoch 6253 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6253, validation elapsed_time:0.10538291931152344
INFO:base_model:epoch 6253 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6254 Train Loss: 7937.740234375 elapsed: 0.7933409214019775
DEBUG:base_model:epoch 6254 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6254, validation elapsed_time:0.08228707313537598
INFO:base_model:epoch 6254 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6255 Train Loss: 7937.7

INFO:base_model:epoch 6271 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6272 Train Loss: 7951.5625 elapsed: 0.7805380821228027
DEBUG:base_model:epoch 6272 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6272, validation elapsed_time:0.16098999977111816
INFO:base_model:epoch 6272 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6273 Train Loss: 7930.8291015625 elapsed: 0.6788680553436279
DEBUG:base_model:epoch 6273 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6273, validation elapsed_time:0.08233022689819336
INFO:base_model:epoch 6273 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6274 Train Loss: 7937.7402343

INFO:base_model:epoch 6290 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6291 Train Loss: 7923.91845703125 elapsed: 0.8544139862060547
DEBUG:base_model:epoch 6291 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6291, validation elapsed_time:0.1651458740234375
INFO:base_model:epoch 6291 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6292 Train Loss: 7937.740234375 elapsed: 0.8516678810119629
DEBUG:base_model:epoch 6292 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6292, validation elapsed_time:0.13422012329101562
INFO:base_model:epoch 6292 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6293 Train Loss: 7896.27

INFO:base_model:epoch 6309 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6310 Train Loss: 7910.09619140625 elapsed: 0.8418087959289551
DEBUG:base_model:epoch 6310 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6310, validation elapsed_time:0.09215283393859863
INFO:base_model:epoch 6310 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6311 Train Loss: 7923.91845703125 elapsed: 0.7026078701019287
DEBUG:base_model:epoch 6311 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6311, validation elapsed_time:0.2573392391204834
INFO:base_model:epoch 6311 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6312 Train Loss: 7930.

INFO:base_model:epoch 6328 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6329 Train Loss: 7930.8291015625 elapsed: 0.7083232402801514
DEBUG:base_model:epoch 6329 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6329, validation elapsed_time:0.262099027633667
INFO:base_model:epoch 6329 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6330 Train Loss: 7923.91845703125 elapsed: 0.7706699371337891
DEBUG:base_model:epoch 6330 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6330, validation elapsed_time:0.094635009765625
INFO:base_model:epoch 6330 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6331 Train Loss: 7937.7402

INFO:base_model:epoch 6347 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6348 Train Loss: 7917.00732421875 elapsed: 0.83335280418396
DEBUG:base_model:epoch 6348 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6348, validation elapsed_time:0.15837502479553223
INFO:base_model:epoch 6348 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6349 Train Loss: 7923.91845703125 elapsed: 0.6552529335021973
DEBUG:base_model:epoch 6349 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6349, validation elapsed_time:0.08937883377075195
INFO:base_model:epoch 6349 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6350 Train Loss: 7930.8

INFO:base_model:epoch 6366 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6367 Train Loss: 7923.91845703125 elapsed: 0.8511738777160645
DEBUG:base_model:epoch 6367 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6367, validation elapsed_time:0.08857178688049316
INFO:base_model:epoch 6367 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6368 Train Loss: 7944.6513671875 elapsed: 0.6666171550750732
DEBUG:base_model:epoch 6368 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6368, validation elapsed_time:0.08334589004516602
INFO:base_model:epoch 6368 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6369 Train Loss: 7930.

INFO:base_model:epoch 6385 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6386 Train Loss: 7923.91845703125 elapsed: 0.8628039360046387
DEBUG:base_model:epoch 6386 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6386, validation elapsed_time:0.1298079490661621
INFO:base_model:epoch 6386 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6387 Train Loss: 7937.740234375 elapsed: 0.7854199409484863
DEBUG:base_model:epoch 6387 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6387, validation elapsed_time:0.08318829536437988
INFO:base_model:epoch 6387 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6388 Train Loss: 7917.00

INFO:base_model:epoch 6404 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6405 Train Loss: 7937.740234375 elapsed: 0.6272897720336914
DEBUG:base_model:epoch 6405 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6405, validation elapsed_time:0.09408807754516602
INFO:base_model:epoch 6405 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6406 Train Loss: 7937.740234375 elapsed: 0.8568599224090576
DEBUG:base_model:epoch 6406 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6406, validation elapsed_time:0.1384410858154297
INFO:base_model:epoch 6406 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6407 Train Loss: 7937.7402

INFO:base_model:epoch 6423 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6424 Train Loss: 7937.740234375 elapsed: 0.8754720687866211
DEBUG:base_model:epoch 6424 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6424, validation elapsed_time:0.18221712112426758
INFO:base_model:epoch 6424 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6425 Train Loss: 7937.740234375 elapsed: 0.8756201267242432
DEBUG:base_model:epoch 6425 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6425, validation elapsed_time:0.10190510749816895
INFO:base_model:epoch 6425 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6426 Train Loss: 7917.007

INFO:base_model:epoch 6442 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6443 Train Loss: 7910.09619140625 elapsed: 0.7164199352264404
DEBUG:base_model:epoch 6443 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6443, validation elapsed_time:0.09346485137939453
INFO:base_model:epoch 6443 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6444 Train Loss: 7930.8291015625 elapsed: 0.8372540473937988
DEBUG:base_model:epoch 6444 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6444, validation elapsed_time:0.08178305625915527
INFO:base_model:epoch 6444 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6445 Train Loss: 7923.

INFO:base_model:epoch 6461 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6462 Train Loss: 7937.740234375 elapsed: 0.9043188095092773
DEBUG:base_model:epoch 6462 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6462, validation elapsed_time:0.13127994537353516
INFO:base_model:epoch 6462 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6463 Train Loss: 7917.00732421875 elapsed: 1.0314362049102783
DEBUG:base_model:epoch 6463 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6463, validation elapsed_time:0.12346982955932617
INFO:base_model:epoch 6463 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6464 Train Loss: 7910.0

INFO:base_model:epoch 6480 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6481 Train Loss: 7923.91845703125 elapsed: 0.9166998863220215
DEBUG:base_model:epoch 6481 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6481, validation elapsed_time:0.15427470207214355
INFO:base_model:epoch 6481 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6482 Train Loss: 7930.8291015625 elapsed: 0.677170991897583
DEBUG:base_model:epoch 6482 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6482, validation elapsed_time:0.09383511543273926
INFO:base_model:epoch 6482 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6483 Train Loss: 7917.0

INFO:base_model:epoch 6499 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6500 Train Loss: 7930.8291015625 elapsed: 0.8196887969970703
DEBUG:base_model:epoch 6500 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6500, validation elapsed_time:0.09211206436157227
INFO:base_model:epoch 6500 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6501 Train Loss: 7944.6513671875 elapsed: 0.882727861404419
DEBUG:base_model:epoch 6501 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6501, validation elapsed_time:0.21521878242492676
INFO:base_model:epoch 6501 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6502 Train Loss: 7923.91

INFO:base_model:epoch 6518 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6519 Train Loss: 7910.09619140625 elapsed: 0.8269510269165039
DEBUG:base_model:epoch 6519 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6519, validation elapsed_time:0.1722102165222168
INFO:base_model:epoch 6519 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6520 Train Loss: 7917.00732421875 elapsed: 0.791327953338623
DEBUG:base_model:epoch 6520 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6520, validation elapsed_time:0.10480070114135742
INFO:base_model:epoch 6520 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6521 Train Loss: 7917.0

INFO:base_model:epoch 6537 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6538 Train Loss: 7910.09619140625 elapsed: 0.6578712463378906
DEBUG:base_model:epoch 6538 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6538, validation elapsed_time:0.0866849422454834
INFO:base_model:epoch 6538 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6539 Train Loss: 7910.09619140625 elapsed: 0.8283209800720215
DEBUG:base_model:epoch 6539 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6539, validation elapsed_time:0.23563694953918457
INFO:base_model:epoch 6539 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6540 Train Loss: 7930.

INFO:base_model:epoch 6556 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6557 Train Loss: 7937.740234375 elapsed: 1.0027179718017578
DEBUG:base_model:epoch 6557 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6557, validation elapsed_time:0.139174222946167
INFO:base_model:epoch 6557 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6558 Train Loss: 7910.09619140625 elapsed: 0.8061387538909912
DEBUG:base_model:epoch 6558 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6558, validation elapsed_time:0.08742189407348633
INFO:base_model:epoch 6558 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6559 Train Loss: 7903.185

INFO:base_model:epoch 6575 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6576 Train Loss: 7937.740234375 elapsed: 0.6915109157562256
DEBUG:base_model:epoch 6576 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6576, validation elapsed_time:0.10075116157531738
INFO:base_model:epoch 6576 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6577 Train Loss: 7917.00732421875 elapsed: 0.7153229713439941
DEBUG:base_model:epoch 6577 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6577, validation elapsed_time:0.0839078426361084
INFO:base_model:epoch 6577 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6578 Train Loss: 7917.00

INFO:base_model:epoch 6594 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6595 Train Loss: 7923.91845703125 elapsed: 0.7752213478088379
DEBUG:base_model:epoch 6595 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6595, validation elapsed_time:0.09773707389831543
INFO:base_model:epoch 6595 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6596 Train Loss: 7923.91845703125 elapsed: 0.7253928184509277
DEBUG:base_model:epoch 6596 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6596, validation elapsed_time:0.10387706756591797
INFO:base_model:epoch 6596 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6597 Train Loss: 7903

INFO:base_model:epoch 6613 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6614 Train Loss: 7917.00732421875 elapsed: 0.6976678371429443
DEBUG:base_model:epoch 6614 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6614, validation elapsed_time:0.10453557968139648
INFO:base_model:epoch 6614 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6615 Train Loss: 7910.09619140625 elapsed: 0.6868209838867188
DEBUG:base_model:epoch 6615 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6615, validation elapsed_time:0.08481001853942871
INFO:base_model:epoch 6615 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6616 Train Loss: 7903

INFO:base_model:epoch 6632 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6633 Train Loss: 7937.740234375 elapsed: 0.8856351375579834
DEBUG:base_model:epoch 6633 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6633, validation elapsed_time:0.102294921875
INFO:base_model:epoch 6633 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6634 Train Loss: 7930.8291015625 elapsed: 0.6544721126556396
DEBUG:base_model:epoch 6634 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6634, validation elapsed_time:0.08475589752197266
INFO:base_model:epoch 6634 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6635 Train Loss: 7930.8291015

INFO:base_model:epoch 6651 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6652 Train Loss: 7944.6513671875 elapsed: 0.8742640018463135
DEBUG:base_model:epoch 6652 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6652, validation elapsed_time:0.11707377433776855
INFO:base_model:epoch 6652 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6653 Train Loss: 7930.8291015625 elapsed: 0.7987098693847656
DEBUG:base_model:epoch 6653 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6653, validation elapsed_time:0.09574508666992188
INFO:base_model:epoch 6653 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6654 Train Loss: 7937.7

INFO:base_model:epoch 6670 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6671 Train Loss: 7917.00732421875 elapsed: 0.8551919460296631
DEBUG:base_model:epoch 6671 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6671, validation elapsed_time:0.12338376045227051
INFO:base_model:epoch 6671 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6672 Train Loss: 7930.8291015625 elapsed: 0.7907769680023193
DEBUG:base_model:epoch 6672 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6672, validation elapsed_time:0.08541512489318848
INFO:base_model:epoch 6672 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6673 Train Loss: 7937.

INFO:base_model:epoch 6689 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6690 Train Loss: 7917.00732421875 elapsed: 0.6299722194671631
DEBUG:base_model:epoch 6690 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6690, validation elapsed_time:0.11669111251831055
INFO:base_model:epoch 6690 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6691 Train Loss: 7923.91845703125 elapsed: 0.8492758274078369
DEBUG:base_model:epoch 6691 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6691, validation elapsed_time:0.16419291496276855
INFO:base_model:epoch 6691 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6692 Train Loss: 7930

INFO:base_model:epoch 6708 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6709 Train Loss: 7930.8291015625 elapsed: 0.7042429447174072
DEBUG:base_model:epoch 6709 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6709, validation elapsed_time:0.09190917015075684
INFO:base_model:epoch 6709 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6710 Train Loss: 7917.00732421875 elapsed: 0.9538791179656982
DEBUG:base_model:epoch 6710 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6710, validation elapsed_time:0.12859201431274414
INFO:base_model:epoch 6710 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6711 Train Loss: 7930.

INFO:base_model:epoch 6727 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6728 Train Loss: 7930.8291015625 elapsed: 0.6524758338928223
DEBUG:base_model:epoch 6728 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6728, validation elapsed_time:0.32003188133239746
INFO:base_model:epoch 6728 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6729 Train Loss: 7923.91845703125 elapsed: 0.7411890029907227
DEBUG:base_model:epoch 6729 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6729, validation elapsed_time:0.13589787483215332
INFO:base_model:epoch 6729 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6730 Train Loss: 7910.

INFO:base_model:epoch 6746 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6747 Train Loss: 7944.6513671875 elapsed: 0.8163261413574219
DEBUG:base_model:epoch 6747 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6747, validation elapsed_time:0.15567588806152344
INFO:base_model:epoch 6747 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6748 Train Loss: 7944.6513671875 elapsed: 0.6681232452392578
DEBUG:base_model:epoch 6748 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6748, validation elapsed_time:0.07911205291748047
INFO:base_model:epoch 6748 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6749 Train Loss: 7930.8

INFO:base_model:epoch 6765 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6766 Train Loss: 7923.91845703125 elapsed: 0.6810851097106934
DEBUG:base_model:epoch 6766 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6766, validation elapsed_time:0.08181118965148926
INFO:base_model:epoch 6766 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6767 Train Loss: 7930.8291015625 elapsed: 0.7704050540924072
DEBUG:base_model:epoch 6767 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6767, validation elapsed_time:0.1964707374572754
INFO:base_model:epoch 6767 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6768 Train Loss: 7923.9

INFO:base_model:epoch 6784 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6785 Train Loss: 7910.09619140625 elapsed: 0.8911550045013428
DEBUG:base_model:epoch 6785 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6785, validation elapsed_time:0.13143706321716309
INFO:base_model:epoch 6785 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6786 Train Loss: 7930.8291015625 elapsed: 0.8396661281585693
DEBUG:base_model:epoch 6786 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6786, validation elapsed_time:0.08436703681945801
INFO:base_model:epoch 6786 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6787 Train Loss: 7944.

INFO:base_model:epoch 6803 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6804 Train Loss: 7930.8291015625 elapsed: 1.0826830863952637
DEBUG:base_model:epoch 6804 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6804, validation elapsed_time:0.2169322967529297
INFO:base_model:epoch 6804 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6805 Train Loss: 7937.740234375 elapsed: 0.632713794708252
DEBUG:base_model:epoch 6805 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6805, validation elapsed_time:0.10368514060974121
INFO:base_model:epoch 6805 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6806 Train Loss: 7930.8291

INFO:base_model:epoch 6822 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6823 Train Loss: 7917.00732421875 elapsed: 0.7725586891174316
DEBUG:base_model:epoch 6823 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6823, validation elapsed_time:0.08873796463012695
INFO:base_model:epoch 6823 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6824 Train Loss: 7910.09619140625 elapsed: 0.8167190551757812
DEBUG:base_model:epoch 6824 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6824, validation elapsed_time:0.09546494483947754
INFO:base_model:epoch 6824 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6825 Train Loss: 7937

INFO:base_model:epoch 6841 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6842 Train Loss: 7930.8291015625 elapsed: 0.7015619277954102
DEBUG:base_model:epoch 6842 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6842, validation elapsed_time:0.10734272003173828
INFO:base_model:epoch 6842 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6843 Train Loss: 7923.91845703125 elapsed: 0.8336138725280762
DEBUG:base_model:epoch 6843 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6843, validation elapsed_time:0.10050082206726074
INFO:base_model:epoch 6843 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6844 Train Loss: 7930.

INFO:base_model:epoch 6860 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6861 Train Loss: 7937.740234375 elapsed: 0.7288739681243896
DEBUG:base_model:epoch 6861 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6861, validation elapsed_time:0.08693504333496094
INFO:base_model:epoch 6861 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6862 Train Loss: 7937.740234375 elapsed: 0.8191418647766113
DEBUG:base_model:epoch 6862 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6862, validation elapsed_time:0.21875
INFO:base_model:epoch 6862 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6863 Train Loss: 7917.00732421875 ela

INFO:base_model:epoch 6879 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6880 Train Loss: 7923.91845703125 elapsed: 0.7783370018005371
DEBUG:base_model:epoch 6880 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6880, validation elapsed_time:0.10154008865356445
INFO:base_model:epoch 6880 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6881 Train Loss: 7903.18505859375 elapsed: 0.6946091651916504
DEBUG:base_model:epoch 6881 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6881, validation elapsed_time:0.12431120872497559
INFO:base_model:epoch 6881 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6882 Train Loss: 7923

INFO:base_model:epoch 6898 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6899 Train Loss: 7944.6513671875 elapsed: 0.7866249084472656
DEBUG:base_model:epoch 6899 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6899, validation elapsed_time:0.12460494041442871
INFO:base_model:epoch 6899 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6900 Train Loss: 7937.740234375 elapsed: 0.9204208850860596
DEBUG:base_model:epoch 6900 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6900, validation elapsed_time:0.1958320140838623
INFO:base_model:epoch 6900 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6901 Train Loss: 7923.918

INFO:base_model:epoch 6917 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6918 Train Loss: 7937.740234375 elapsed: 0.9103479385375977
DEBUG:base_model:epoch 6918 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6918, validation elapsed_time:0.1240549087524414
INFO:base_model:epoch 6918 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6919 Train Loss: 7917.00732421875 elapsed: 0.7719478607177734
DEBUG:base_model:epoch 6919 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6919, validation elapsed_time:0.08409285545349121
INFO:base_model:epoch 6919 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6920 Train Loss: 7923.91

INFO:base_model:epoch 6936 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6937 Train Loss: 7937.740234375 elapsed: 0.6647160053253174
DEBUG:base_model:epoch 6937 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6937, validation elapsed_time:0.08982372283935547
INFO:base_model:epoch 6937 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6938 Train Loss: 7930.8291015625 elapsed: 0.7707598209381104
DEBUG:base_model:epoch 6938 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6938, validation elapsed_time:0.16068100929260254
INFO:base_model:epoch 6938 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6939 Train Loss: 7930.82

INFO:base_model:epoch 6955 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6956 Train Loss: 7923.91845703125 elapsed: 0.7254562377929688
DEBUG:base_model:epoch 6956 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6956, validation elapsed_time:0.08518123626708984
INFO:base_model:epoch 6956 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6957 Train Loss: 7930.8291015625 elapsed: 0.8563988208770752
DEBUG:base_model:epoch 6957 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6957, validation elapsed_time:0.16038274765014648
INFO:base_model:epoch 6957 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6958 Train Loss: 7910.

INFO:base_model:epoch 6974 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6975 Train Loss: 7937.740234375 elapsed: 0.7671899795532227
DEBUG:base_model:epoch 6975 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6975, validation elapsed_time:0.08311772346496582
INFO:base_model:epoch 6975 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6976 Train Loss: 7937.740234375 elapsed: 0.8280699253082275
DEBUG:base_model:epoch 6976 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6976, validation elapsed_time:0.12889480590820312
INFO:base_model:epoch 6976 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6977 Train Loss: 7937.740

INFO:base_model:epoch 6993 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6994 Train Loss: 7910.09619140625 elapsed: 0.7579660415649414
DEBUG:base_model:epoch 6994 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6994, validation elapsed_time:0.14679503440856934
INFO:base_model:epoch 6994 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6995 Train Loss: 7937.740234375 elapsed: 0.8363189697265625
DEBUG:base_model:epoch 6995 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:6995, validation elapsed_time:0.15094494819641113
INFO:base_model:epoch 6995 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 6996 Train Loss: 7930.8

INFO:base_model:epoch 7012 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7013 Train Loss: 7923.91845703125 elapsed: 0.7823371887207031
DEBUG:base_model:epoch 7013 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7013, validation elapsed_time:0.15252113342285156
INFO:base_model:epoch 7013 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7014 Train Loss: 7930.8291015625 elapsed: 0.6542229652404785
DEBUG:base_model:epoch 7014 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7014, validation elapsed_time:0.08593416213989258
INFO:base_model:epoch 7014 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7015 Train Loss: 7930.

INFO:base_model:epoch 7031 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7032 Train Loss: 7930.8291015625 elapsed: 0.8427588939666748
DEBUG:base_model:epoch 7032 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7032, validation elapsed_time:0.08395695686340332
INFO:base_model:epoch 7032 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7033 Train Loss: 7910.09619140625 elapsed: 0.8381080627441406
DEBUG:base_model:epoch 7033 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7033, validation elapsed_time:0.14751672744750977
INFO:base_model:epoch 7033 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7034 Train Loss: 7937.

INFO:base_model:epoch 7050 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7051 Train Loss: 7937.740234375 elapsed: 0.7261538505554199
DEBUG:base_model:epoch 7051 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7051, validation elapsed_time:0.09241175651550293
INFO:base_model:epoch 7051 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7052 Train Loss: 7930.8291015625 elapsed: 0.8434047698974609
DEBUG:base_model:epoch 7052 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7052, validation elapsed_time:0.13033008575439453
INFO:base_model:epoch 7052 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7053 Train Loss: 7937.74

INFO:base_model:epoch 7069 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7070 Train Loss: 7917.00732421875 elapsed: 1.0264320373535156
DEBUG:base_model:epoch 7070 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7070, validation elapsed_time:0.15497922897338867
INFO:base_model:epoch 7070 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7071 Train Loss: 7917.00732421875 elapsed: 0.6706349849700928
DEBUG:base_model:epoch 7071 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7071, validation elapsed_time:0.13799238204956055
INFO:base_model:epoch 7071 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7072 Train Loss: 7917

INFO:base_model:epoch 7088 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7089 Train Loss: 7917.00732421875 elapsed: 0.7767701148986816
DEBUG:base_model:epoch 7089 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7089, validation elapsed_time:0.17715096473693848
INFO:base_model:epoch 7089 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7090 Train Loss: 7944.6513671875 elapsed: 0.9093358516693115
DEBUG:base_model:epoch 7090 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7090, validation elapsed_time:0.1219642162322998
INFO:base_model:epoch 7090 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7091 Train Loss: 7923.9

INFO:base_model:epoch 7107 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7108 Train Loss: 7923.91845703125 elapsed: 0.8001699447631836
DEBUG:base_model:epoch 7108 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7108, validation elapsed_time:0.08849620819091797
INFO:base_model:epoch 7108 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7109 Train Loss: 7937.740234375 elapsed: 0.8080859184265137
DEBUG:base_model:epoch 7109 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7109, validation elapsed_time:0.1507561206817627
INFO:base_model:epoch 7109 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7110 Train Loss: 7910.09

INFO:base_model:epoch 7126 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7127 Train Loss: 7937.740234375 elapsed: 0.7080860137939453
DEBUG:base_model:epoch 7127 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7127, validation elapsed_time:0.10437893867492676
INFO:base_model:epoch 7127 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7128 Train Loss: 7923.91845703125 elapsed: 0.7967369556427002
DEBUG:base_model:epoch 7128 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7128, validation elapsed_time:0.08872199058532715
INFO:base_model:epoch 7128 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7129 Train Loss: 7917.0

INFO:base_model:epoch 7145 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7146 Train Loss: 7923.91845703125 elapsed: 0.8106050491333008
DEBUG:base_model:epoch 7146 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7146, validation elapsed_time:0.10914111137390137
INFO:base_model:epoch 7146 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7147 Train Loss: 7903.18505859375 elapsed: 0.9994900226593018
DEBUG:base_model:epoch 7147 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7147, validation elapsed_time:0.2711050510406494
INFO:base_model:epoch 7147 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7148 Train Loss: 7923.

INFO:base_model:epoch 7164 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7165 Train Loss: 7937.740234375 elapsed: 0.7831387519836426
DEBUG:base_model:epoch 7165 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7165, validation elapsed_time:0.09859514236450195
INFO:base_model:epoch 7165 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7166 Train Loss: 7917.00732421875 elapsed: 0.7573239803314209
DEBUG:base_model:epoch 7166 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7166, validation elapsed_time:0.08318400382995605
INFO:base_model:epoch 7166 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7167 Train Loss: 7917.0

INFO:base_model:epoch 7183 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7184 Train Loss: 7923.91845703125 elapsed: 0.6177248954772949
DEBUG:base_model:epoch 7184 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7184, validation elapsed_time:0.11586284637451172
INFO:base_model:epoch 7184 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7185 Train Loss: 7930.8291015625 elapsed: 0.7397022247314453
DEBUG:base_model:epoch 7185 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7185, validation elapsed_time:0.16789603233337402
INFO:base_model:epoch 7185 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7186 Train Loss: 7923.

INFO:base_model:epoch 7202 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7203 Train Loss: 7930.8291015625 elapsed: 0.854499101638794
DEBUG:base_model:epoch 7203 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7203, validation elapsed_time:0.08497500419616699
INFO:base_model:epoch 7203 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7204 Train Loss: 7930.8291015625 elapsed: 0.891362190246582
DEBUG:base_model:epoch 7204 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7204, validation elapsed_time:0.11984896659851074
INFO:base_model:epoch 7204 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7205 Train Loss: 7923.918

INFO:base_model:epoch 7221 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7222 Train Loss: 7923.91845703125 elapsed: 0.8122339248657227
DEBUG:base_model:epoch 7222 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7222, validation elapsed_time:0.12276911735534668
INFO:base_model:epoch 7222 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7223 Train Loss: 7937.740234375 elapsed: 0.7292768955230713
DEBUG:base_model:epoch 7223 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7223, validation elapsed_time:0.28040385246276855
INFO:base_model:epoch 7223 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7224 Train Loss: 7923.9

INFO:base_model:epoch 7240 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7241 Train Loss: 7930.8291015625 elapsed: 0.7819173336029053
DEBUG:base_model:epoch 7241 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7241, validation elapsed_time:0.10218691825866699
INFO:base_model:epoch 7241 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7242 Train Loss: 7937.740234375 elapsed: 0.7408161163330078
DEBUG:base_model:epoch 7242 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7242, validation elapsed_time:0.16420793533325195
INFO:base_model:epoch 7242 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7243 Train Loss: 7923.91

INFO:base_model:epoch 7259 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7260 Train Loss: 7930.8291015625 elapsed: 0.8934869766235352
DEBUG:base_model:epoch 7260 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7260, validation elapsed_time:0.1015779972076416
INFO:base_model:epoch 7260 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7261 Train Loss: 7930.8291015625 elapsed: 0.7704441547393799
DEBUG:base_model:epoch 7261 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7261, validation elapsed_time:0.08568978309631348
INFO:base_model:epoch 7261 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7262 Train Loss: 7910.09

INFO:base_model:epoch 7278 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7279 Train Loss: 7930.8291015625 elapsed: 0.6954503059387207
DEBUG:base_model:epoch 7279 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7279, validation elapsed_time:0.1287248134613037
INFO:base_model:epoch 7279 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7280 Train Loss: 7923.91845703125 elapsed: 1.3047590255737305
DEBUG:base_model:epoch 7280 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7280, validation elapsed_time:0.25257182121276855
INFO:base_model:epoch 7280 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7281 Train Loss: 7910.0

INFO:base_model:epoch 7297 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7298 Train Loss: 7937.740234375 elapsed: 2.461160182952881
DEBUG:base_model:epoch 7298 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7298, validation elapsed_time:0.2288987636566162
INFO:base_model:epoch 7298 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7299 Train Loss: 7944.6513671875 elapsed: 1.2802000045776367
DEBUG:base_model:epoch 7299 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7299, validation elapsed_time:0.37775373458862305
INFO:base_model:epoch 7299 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7300 Train Loss: 7910.0961

INFO:base_model:epoch 7316 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7317 Train Loss: 7930.8291015625 elapsed: 0.7742609977722168
DEBUG:base_model:epoch 7317 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7317, validation elapsed_time:0.08300065994262695
INFO:base_model:epoch 7317 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7318 Train Loss: 7944.6513671875 elapsed: 0.7958741188049316
DEBUG:base_model:epoch 7318 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7318, validation elapsed_time:0.17900586128234863
INFO:base_model:epoch 7318 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7319 Train Loss: 7917.0

INFO:base_model:epoch 7335 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7336 Train Loss: 7923.91845703125 elapsed: 0.6415553092956543
DEBUG:base_model:epoch 7336 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7336, validation elapsed_time:0.10377287864685059
INFO:base_model:epoch 7336 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7337 Train Loss: 7937.740234375 elapsed: 0.8602371215820312
DEBUG:base_model:epoch 7337 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7337, validation elapsed_time:0.20895791053771973
INFO:base_model:epoch 7337 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7338 Train Loss: 7923.9

INFO:base_model:epoch 7354 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7355 Train Loss: 7917.00732421875 elapsed: 0.7352077960968018
DEBUG:base_model:epoch 7355 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7355, validation elapsed_time:0.0926358699798584
INFO:base_model:epoch 7355 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7356 Train Loss: 7923.91845703125 elapsed: 0.89693284034729
DEBUG:base_model:epoch 7356 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7356, validation elapsed_time:0.29116320610046387
INFO:base_model:epoch 7356 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7357 Train Loss: 7923.91

INFO:base_model:epoch 7373 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7374 Train Loss: 7917.00732421875 elapsed: 0.7266528606414795
DEBUG:base_model:epoch 7374 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7374, validation elapsed_time:0.10624217987060547
INFO:base_model:epoch 7374 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7375 Train Loss: 7930.8291015625 elapsed: 0.70644211769104
DEBUG:base_model:epoch 7375 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7375, validation elapsed_time:0.14110612869262695
INFO:base_model:epoch 7375 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7376 Train Loss: 7917.00

INFO:base_model:epoch 7392 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7393 Train Loss: 7944.6513671875 elapsed: 0.8185667991638184
DEBUG:base_model:epoch 7393 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7393, validation elapsed_time:0.09645581245422363
INFO:base_model:epoch 7393 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7394 Train Loss: 7903.18505859375 elapsed: 0.6706099510192871
DEBUG:base_model:epoch 7394 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7394, validation elapsed_time:0.08542990684509277
INFO:base_model:epoch 7394 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7395 Train Loss: 7917.

INFO:base_model:epoch 7411 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7412 Train Loss: 7930.8291015625 elapsed: 0.7858688831329346
DEBUG:base_model:epoch 7412 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7412, validation elapsed_time:0.12615513801574707
INFO:base_model:epoch 7412 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7413 Train Loss: 7917.00732421875 elapsed: 0.6039879322052002
DEBUG:base_model:epoch 7413 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7413, validation elapsed_time:0.11984014511108398
INFO:base_model:epoch 7413 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7414 Train Loss: 7937.

INFO:base_model:epoch 7430 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7431 Train Loss: 7930.8291015625 elapsed: 0.8888769149780273
DEBUG:base_model:epoch 7431 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7431, validation elapsed_time:0.11591601371765137
INFO:base_model:epoch 7431 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7432 Train Loss: 7944.6513671875 elapsed: 0.8759658336639404
DEBUG:base_model:epoch 7432 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7432, validation elapsed_time:0.17239022254943848
INFO:base_model:epoch 7432 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7433 Train Loss: 7910.0

INFO:base_model:epoch 7449 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7450 Train Loss: 7917.00732421875 elapsed: 0.6997299194335938
DEBUG:base_model:epoch 7450 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7450, validation elapsed_time:0.09157562255859375
INFO:base_model:epoch 7450 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7451 Train Loss: 7930.8291015625 elapsed: 0.8123588562011719
DEBUG:base_model:epoch 7451 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7451, validation elapsed_time:0.17678594589233398
INFO:base_model:epoch 7451 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7452 Train Loss: 7910.

INFO:base_model:epoch 7468 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7469 Train Loss: 7930.8291015625 elapsed: 0.733483076095581
DEBUG:base_model:epoch 7469 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7469, validation elapsed_time:0.13702774047851562
INFO:base_model:epoch 7469 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7470 Train Loss: 7923.91845703125 elapsed: 0.7399280071258545
DEBUG:base_model:epoch 7470 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7470, validation elapsed_time:0.16060400009155273
INFO:base_model:epoch 7470 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7471 Train Loss: 7923.9

INFO:base_model:epoch 7487 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7488 Train Loss: 7917.00732421875 elapsed: 0.795198917388916
DEBUG:base_model:epoch 7488 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7488, validation elapsed_time:0.12345504760742188
INFO:base_model:epoch 7488 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7489 Train Loss: 7923.91845703125 elapsed: 0.624100923538208
DEBUG:base_model:epoch 7489 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7489, validation elapsed_time:0.13206911087036133
INFO:base_model:epoch 7489 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7490 Train Loss: 7930.8

INFO:base_model:epoch 7506 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7507 Train Loss: 7923.91845703125 elapsed: 0.7792377471923828
DEBUG:base_model:epoch 7507 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7507, validation elapsed_time:0.08542180061340332
INFO:base_model:epoch 7507 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7508 Train Loss: 7930.8291015625 elapsed: 0.8609390258789062
DEBUG:base_model:epoch 7508 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7508, validation elapsed_time:0.08531999588012695
INFO:base_model:epoch 7508 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7509 Train Loss: 7951.

INFO:base_model:epoch 7525 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7526 Train Loss: 7917.00732421875 elapsed: 0.9416921138763428
DEBUG:base_model:epoch 7526 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7526, validation elapsed_time:0.09956789016723633
INFO:base_model:epoch 7526 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7527 Train Loss: 7937.740234375 elapsed: 0.749640941619873
DEBUG:base_model:epoch 7527 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7527, validation elapsed_time:0.09542679786682129
INFO:base_model:epoch 7527 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7528 Train Loss: 7937.74

INFO:base_model:epoch 7544 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7545 Train Loss: 7930.8291015625 elapsed: 0.775346040725708
DEBUG:base_model:epoch 7545 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7545, validation elapsed_time:0.09725809097290039
INFO:base_model:epoch 7545 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7546 Train Loss: 7930.8291015625 elapsed: 0.6932859420776367
DEBUG:base_model:epoch 7546 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7546, validation elapsed_time:0.12389111518859863
INFO:base_model:epoch 7546 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7547 Train Loss: 7944.65

INFO:base_model:epoch 7563 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7564 Train Loss: 7930.8291015625 elapsed: 0.7672879695892334
DEBUG:base_model:epoch 7564 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7564, validation elapsed_time:0.08341407775878906
INFO:base_model:epoch 7564 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7565 Train Loss: 7937.740234375 elapsed: 0.8574390411376953
DEBUG:base_model:epoch 7565 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7565, validation elapsed_time:0.08570098876953125
INFO:base_model:epoch 7565 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7566 Train Loss: 7944.65

INFO:base_model:epoch 7582 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7583 Train Loss: 7903.18505859375 elapsed: 0.8496050834655762
DEBUG:base_model:epoch 7583 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7583, validation elapsed_time:0.10193419456481934
INFO:base_model:epoch 7583 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7584 Train Loss: 7917.00732421875 elapsed: 0.825157880783081
DEBUG:base_model:epoch 7584 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7584, validation elapsed_time:0.28144407272338867
INFO:base_model:epoch 7584 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7585 Train Loss: 7923.

INFO:base_model:epoch 7601 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7602 Train Loss: 7944.6513671875 elapsed: 0.8425149917602539
DEBUG:base_model:epoch 7602 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7602, validation elapsed_time:0.10814285278320312
INFO:base_model:epoch 7602 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7603 Train Loss: 7930.8291015625 elapsed: 0.8208229541778564
DEBUG:base_model:epoch 7603 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7603, validation elapsed_time:0.11414909362792969
INFO:base_model:epoch 7603 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7604 Train Loss: 7917.0

INFO:base_model:epoch 7620 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7621 Train Loss: 7917.00732421875 elapsed: 0.826840877532959
DEBUG:base_model:epoch 7621 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7621, validation elapsed_time:0.09578299522399902
INFO:base_model:epoch 7621 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7622 Train Loss: 7923.91845703125 elapsed: 0.6713552474975586
DEBUG:base_model:epoch 7622 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7622, validation elapsed_time:0.08604097366333008
INFO:base_model:epoch 7622 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7623 Train Loss: 7917.

INFO:base_model:epoch 7639 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7640 Train Loss: 7937.740234375 elapsed: 0.9935197830200195
DEBUG:base_model:epoch 7640 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7640, validation elapsed_time:0.0846700668334961
INFO:base_model:epoch 7640 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7641 Train Loss: 7937.740234375 elapsed: 0.8633096218109131
DEBUG:base_model:epoch 7641 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7641, validation elapsed_time:0.1082148551940918
INFO:base_model:epoch 7641 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7642 Train Loss: 7923.91845

INFO:base_model:epoch 7658 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7659 Train Loss: 7923.91845703125 elapsed: 0.7434341907501221
DEBUG:base_model:epoch 7659 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7659, validation elapsed_time:0.15958189964294434
INFO:base_model:epoch 7659 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7660 Train Loss: 7923.91845703125 elapsed: 0.9271001815795898
DEBUG:base_model:epoch 7660 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7660, validation elapsed_time:0.13044500350952148
INFO:base_model:epoch 7660 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7661 Train Loss: 7910

INFO:base_model:epoch 7677 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7678 Train Loss: 7923.91845703125 elapsed: 0.7294278144836426
DEBUG:base_model:epoch 7678 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7678, validation elapsed_time:0.08523178100585938
INFO:base_model:epoch 7678 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7679 Train Loss: 7930.8291015625 elapsed: 0.7953150272369385
DEBUG:base_model:epoch 7679 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7679, validation elapsed_time:0.08572983741760254
INFO:base_model:epoch 7679 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7680 Train Loss: 7923.

INFO:base_model:epoch 7696 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7697 Train Loss: 7923.91845703125 elapsed: 0.8376340866088867
DEBUG:base_model:epoch 7697 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7697, validation elapsed_time:0.12960410118103027
INFO:base_model:epoch 7697 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7698 Train Loss: 7937.740234375 elapsed: 0.893345832824707
DEBUG:base_model:epoch 7698 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7698, validation elapsed_time:0.19516801834106445
INFO:base_model:epoch 7698 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7699 Train Loss: 7917.00

INFO:base_model:epoch 7715 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7716 Train Loss: 7930.8291015625 elapsed: 0.8295502662658691
DEBUG:base_model:epoch 7716 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7716, validation elapsed_time:0.14599919319152832
INFO:base_model:epoch 7716 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7717 Train Loss: 7937.740234375 elapsed: 0.8396320343017578
DEBUG:base_model:epoch 7717 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7717, validation elapsed_time:0.10024118423461914
INFO:base_model:epoch 7717 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7718 Train Loss: 7930.82

INFO:base_model:epoch 7734 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7735 Train Loss: 7923.91845703125 elapsed: 0.6709690093994141
DEBUG:base_model:epoch 7735 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7735, validation elapsed_time:0.13531112670898438
INFO:base_model:epoch 7735 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7736 Train Loss: 7917.00732421875 elapsed: 0.6401839256286621
DEBUG:base_model:epoch 7736 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7736, validation elapsed_time:0.07910990715026855
INFO:base_model:epoch 7736 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7737 Train Loss: 7937

INFO:base_model:epoch 7753 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7754 Train Loss: 7937.740234375 elapsed: 0.840609073638916
DEBUG:base_model:epoch 7754 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7754, validation elapsed_time:0.09977602958679199
INFO:base_model:epoch 7754 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7755 Train Loss: 7937.740234375 elapsed: 0.7680971622467041
DEBUG:base_model:epoch 7755 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7755, validation elapsed_time:0.08971285820007324
INFO:base_model:epoch 7755 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7756 Train Loss: 7923.9184

INFO:base_model:epoch 7772 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7773 Train Loss: 7917.00732421875 elapsed: 0.7295501232147217
DEBUG:base_model:epoch 7773 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7773, validation elapsed_time:0.08188796043395996
INFO:base_model:epoch 7773 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7774 Train Loss: 7917.00732421875 elapsed: 0.6399281024932861
DEBUG:base_model:epoch 7774 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7774, validation elapsed_time:0.11736702919006348
INFO:base_model:epoch 7774 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7775 Train Loss: 7944

INFO:base_model:epoch 7791 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7792 Train Loss: 7937.740234375 elapsed: 0.642833948135376
DEBUG:base_model:epoch 7792 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7792, validation elapsed_time:0.07482600212097168
INFO:base_model:epoch 7792 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7793 Train Loss: 7923.91845703125 elapsed: 0.7441539764404297
DEBUG:base_model:epoch 7793 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7793, validation elapsed_time:0.07709288597106934
INFO:base_model:epoch 7793 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7794 Train Loss: 7917.00

INFO:base_model:epoch 7810 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7811 Train Loss: 7917.00732421875 elapsed: 0.6974880695343018
DEBUG:base_model:epoch 7811 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7811, validation elapsed_time:0.08048796653747559
INFO:base_model:epoch 7811 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7812 Train Loss: 7930.8291015625 elapsed: 0.7043418884277344
DEBUG:base_model:epoch 7812 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7812, validation elapsed_time:0.07793903350830078
INFO:base_model:epoch 7812 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7813 Train Loss: 7910.

INFO:base_model:epoch 7829 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7830 Train Loss: 7930.8291015625 elapsed: 0.5976912975311279
DEBUG:base_model:epoch 7830 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7830, validation elapsed_time:0.09422183036804199
INFO:base_model:epoch 7830 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7831 Train Loss: 7917.00732421875 elapsed: 0.7323870658874512
DEBUG:base_model:epoch 7831 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7831, validation elapsed_time:0.15216684341430664
INFO:base_model:epoch 7831 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7832 Train Loss: 7923.

INFO:base_model:epoch 7848 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7849 Train Loss: 7930.8291015625 elapsed: 0.6467118263244629
DEBUG:base_model:epoch 7849 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7849, validation elapsed_time:0.07549715042114258
INFO:base_model:epoch 7849 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7850 Train Loss: 7910.09619140625 elapsed: 0.6986722946166992
DEBUG:base_model:epoch 7850 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7850, validation elapsed_time:0.1163790225982666
INFO:base_model:epoch 7850 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7851 Train Loss: 7917.0

INFO:base_model:epoch 7867 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7868 Train Loss: 7923.91845703125 elapsed: 0.6957399845123291
DEBUG:base_model:epoch 7868 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7868, validation elapsed_time:0.08973479270935059
INFO:base_model:epoch 7868 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7869 Train Loss: 7923.91845703125 elapsed: 0.7731387615203857
DEBUG:base_model:epoch 7869 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7869, validation elapsed_time:0.1348569393157959
INFO:base_model:epoch 7869 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7870 Train Loss: 7944.

INFO:base_model:epoch 7886 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7887 Train Loss: 7917.00732421875 elapsed: 0.7507939338684082
DEBUG:base_model:epoch 7887 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7887, validation elapsed_time:0.1463611125946045
INFO:base_model:epoch 7887 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7888 Train Loss: 7910.09619140625 elapsed: 0.7111480236053467
DEBUG:base_model:epoch 7888 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7888, validation elapsed_time:0.0886690616607666
INFO:base_model:epoch 7888 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7889 Train Loss: 7951.5

INFO:base_model:epoch 7905 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7906 Train Loss: 7917.00732421875 elapsed: 0.9424989223480225
DEBUG:base_model:epoch 7906 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7906, validation elapsed_time:0.1274709701538086
INFO:base_model:epoch 7906 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7907 Train Loss: 7930.8291015625 elapsed: 0.8770699501037598
DEBUG:base_model:epoch 7907 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7907, validation elapsed_time:0.1461319923400879
INFO:base_model:epoch 7907 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7908 Train Loss: 7937.74

INFO:base_model:epoch 7924 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7925 Train Loss: 7910.09619140625 elapsed: 0.8642990589141846
DEBUG:base_model:epoch 7925 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7925, validation elapsed_time:0.12061691284179688
INFO:base_model:epoch 7925 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7926 Train Loss: 7917.00732421875 elapsed: 0.6769828796386719
DEBUG:base_model:epoch 7926 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7926, validation elapsed_time:0.0848538875579834
INFO:base_model:epoch 7926 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7927 Train Loss: 7937.

INFO:base_model:epoch 7943 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7944 Train Loss: 7937.740234375 elapsed: 0.6074619293212891
DEBUG:base_model:epoch 7944 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7944, validation elapsed_time:0.1204221248626709
INFO:base_model:epoch 7944 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7945 Train Loss: 7917.00732421875 elapsed: 0.8118209838867188
DEBUG:base_model:epoch 7945 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7945, validation elapsed_time:0.08921003341674805
INFO:base_model:epoch 7945 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7946 Train Loss: 7937.74

INFO:base_model:epoch 7962 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7963 Train Loss: 7937.740234375 elapsed: 0.7069690227508545
DEBUG:base_model:epoch 7963 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7963, validation elapsed_time:0.1367359161376953
INFO:base_model:epoch 7963 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7964 Train Loss: 7944.6513671875 elapsed: 0.5871720314025879
DEBUG:base_model:epoch 7964 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7964, validation elapsed_time:0.08411717414855957
INFO:base_model:epoch 7964 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7965 Train Loss: 7930.829

INFO:base_model:epoch 7981 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7982 Train Loss: 7930.8291015625 elapsed: 0.6036128997802734
DEBUG:base_model:epoch 7982 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7982, validation elapsed_time:0.18380999565124512
INFO:base_model:epoch 7982 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7983 Train Loss: 7930.8291015625 elapsed: 0.757620096206665
DEBUG:base_model:epoch 7983 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:7983, validation elapsed_time:0.09576296806335449
INFO:base_model:epoch 7983 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 7984 Train Loss: 7944.65

INFO:base_model:epoch 8000 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8001 Train Loss: 7930.8291015625 elapsed: 0.7123022079467773
DEBUG:base_model:epoch 8001 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8001, validation elapsed_time:0.12742996215820312
INFO:base_model:epoch 8001 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8002 Train Loss: 7917.00732421875 elapsed: 0.6518950462341309
DEBUG:base_model:epoch 8002 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8002, validation elapsed_time:0.07980990409851074
INFO:base_model:epoch 8002 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8003 Train Loss: 7903.

INFO:base_model:epoch 8019 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8020 Train Loss: 7930.8291015625 elapsed: 0.6875200271606445
DEBUG:base_model:epoch 8020 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8020, validation elapsed_time:0.1790618896484375
INFO:base_model:epoch 8020 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8021 Train Loss: 7930.8291015625 elapsed: 0.7252130508422852
DEBUG:base_model:epoch 8021 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8021, validation elapsed_time:0.08768510818481445
INFO:base_model:epoch 8021 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8022 Train Loss: 7910.09

INFO:base_model:epoch 8038 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8039 Train Loss: 7917.00732421875 elapsed: 0.833575963973999
DEBUG:base_model:epoch 8039 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8039, validation elapsed_time:0.12979412078857422
INFO:base_model:epoch 8039 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8040 Train Loss: 7917.00732421875 elapsed: 0.7700908184051514
DEBUG:base_model:epoch 8040 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8040, validation elapsed_time:0.08002614974975586
INFO:base_model:epoch 8040 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8041 Train Loss: 7923.

INFO:base_model:epoch 8057 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8058 Train Loss: 7923.91845703125 elapsed: 0.7472920417785645
DEBUG:base_model:epoch 8058 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8058, validation elapsed_time:0.1134331226348877
INFO:base_model:epoch 8058 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8059 Train Loss: 7923.91845703125 elapsed: 0.6218781471252441
DEBUG:base_model:epoch 8059 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8059, validation elapsed_time:0.11261582374572754
INFO:base_model:epoch 8059 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8060 Train Loss: 7937.

INFO:base_model:epoch 8076 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8077 Train Loss: 7923.91845703125 elapsed: 0.7442610263824463
DEBUG:base_model:epoch 8077 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8077, validation elapsed_time:0.0818631649017334
INFO:base_model:epoch 8077 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8078 Train Loss: 7930.8291015625 elapsed: 0.7654280662536621
DEBUG:base_model:epoch 8078 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8078, validation elapsed_time:0.15148401260375977
INFO:base_model:epoch 8078 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8079 Train Loss: 7923.9

INFO:base_model:epoch 8095 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8096 Train Loss: 7951.5625 elapsed: 0.7766242027282715
DEBUG:base_model:epoch 8096 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8096, validation elapsed_time:0.12832117080688477
INFO:base_model:epoch 8096 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8097 Train Loss: 7944.6513671875 elapsed: 0.757636308670044
DEBUG:base_model:epoch 8097 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8097, validation elapsed_time:0.078765869140625
INFO:base_model:epoch 8097 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8098 Train Loss: 7923.9184570312

INFO:base_model:epoch 8114 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8115 Train Loss: 7937.740234375 elapsed: 0.7974503040313721
DEBUG:base_model:epoch 8115 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8115, validation elapsed_time:0.1238410472869873
INFO:base_model:epoch 8115 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8116 Train Loss: 7903.18505859375 elapsed: 0.7665250301361084
DEBUG:base_model:epoch 8116 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8116, validation elapsed_time:0.10319709777832031
INFO:base_model:epoch 8116 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8117 Train Loss: 7923.91

INFO:base_model:epoch 8133 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8134 Train Loss: 7937.740234375 elapsed: 0.6875581741333008
DEBUG:base_model:epoch 8134 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8134, validation elapsed_time:0.07948589324951172
INFO:base_model:epoch 8134 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8135 Train Loss: 7930.8291015625 elapsed: 0.6442220211029053
DEBUG:base_model:epoch 8135 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8135, validation elapsed_time:0.07737898826599121
INFO:base_model:epoch 8135 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8136 Train Loss: 7937.74

INFO:base_model:epoch 8152 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8153 Train Loss: 7930.8291015625 elapsed: 0.7076451778411865
DEBUG:base_model:epoch 8153 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8153, validation elapsed_time:0.11185407638549805
INFO:base_model:epoch 8153 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8154 Train Loss: 7930.8291015625 elapsed: 0.8945448398590088
DEBUG:base_model:epoch 8154 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8154, validation elapsed_time:0.13012909889221191
INFO:base_model:epoch 8154 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8155 Train Loss: 7951.5

INFO:base_model:epoch 8171 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8172 Train Loss: 7923.91845703125 elapsed: 0.8590641021728516
DEBUG:base_model:epoch 8172 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8172, validation elapsed_time:0.10946202278137207
INFO:base_model:epoch 8172 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8173 Train Loss: 7917.00732421875 elapsed: 0.8366889953613281
DEBUG:base_model:epoch 8173 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8173, validation elapsed_time:0.0814211368560791
INFO:base_model:epoch 8173 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8174 Train Loss: 7930.

INFO:base_model:epoch 8190 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8191 Train Loss: 7923.91845703125 elapsed: 0.628460168838501
DEBUG:base_model:epoch 8191 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8191, validation elapsed_time:0.07876825332641602
INFO:base_model:epoch 8191 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8192 Train Loss: 7917.00732421875 elapsed: 0.6423957347869873
DEBUG:base_model:epoch 8192 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8192, validation elapsed_time:0.07974100112915039
INFO:base_model:epoch 8192 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8193 Train Loss: 7917.

INFO:base_model:epoch 8209 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8210 Train Loss: 7910.09619140625 elapsed: 0.5829367637634277
DEBUG:base_model:epoch 8210 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8210, validation elapsed_time:0.09865784645080566
INFO:base_model:epoch 8210 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8211 Train Loss: 7910.09619140625 elapsed: 0.7276968955993652
DEBUG:base_model:epoch 8211 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8211, validation elapsed_time:0.14742398262023926
INFO:base_model:epoch 8211 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8212 Train Loss: 7944

INFO:base_model:epoch 8228 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8229 Train Loss: 7923.91845703125 elapsed: 0.7435040473937988
DEBUG:base_model:epoch 8229 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8229, validation elapsed_time:0.11284685134887695
INFO:base_model:epoch 8229 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8230 Train Loss: 7930.8291015625 elapsed: 0.6899950504302979
DEBUG:base_model:epoch 8230 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8230, validation elapsed_time:0.08204269409179688
INFO:base_model:epoch 8230 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8231 Train Loss: 7930.

INFO:base_model:epoch 8247 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8248 Train Loss: 7923.91845703125 elapsed: 0.8055031299591064
DEBUG:base_model:epoch 8248 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8248, validation elapsed_time:0.11377596855163574
INFO:base_model:epoch 8248 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8249 Train Loss: 7923.91845703125 elapsed: 0.7258927822113037
DEBUG:base_model:epoch 8249 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8249, validation elapsed_time:0.08038902282714844
INFO:base_model:epoch 8249 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8250 Train Loss: 7903

INFO:base_model:epoch 8266 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8267 Train Loss: 7930.8291015625 elapsed: 0.7908508777618408
DEBUG:base_model:epoch 8267 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8267, validation elapsed_time:0.09170889854431152
INFO:base_model:epoch 8267 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8268 Train Loss: 7944.6513671875 elapsed: 0.6311700344085693
DEBUG:base_model:epoch 8268 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8268, validation elapsed_time:0.08098006248474121
INFO:base_model:epoch 8268 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8269 Train Loss: 7930.8

INFO:base_model:epoch 8285 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8286 Train Loss: 7930.8291015625 elapsed: 0.7586328983306885
DEBUG:base_model:epoch 8286 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8286, validation elapsed_time:0.0896158218383789
INFO:base_model:epoch 8286 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8287 Train Loss: 7930.8291015625 elapsed: 0.7717177867889404
DEBUG:base_model:epoch 8287 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8287, validation elapsed_time:0.14472222328186035
INFO:base_model:epoch 8287 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8288 Train Loss: 7944.65

INFO:base_model:epoch 8304 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8305 Train Loss: 7930.8291015625 elapsed: 0.928098201751709
DEBUG:base_model:epoch 8305 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8305, validation elapsed_time:0.08348703384399414
INFO:base_model:epoch 8305 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8306 Train Loss: 7930.8291015625 elapsed: 0.6062772274017334
DEBUG:base_model:epoch 8306 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8306, validation elapsed_time:0.07848191261291504
INFO:base_model:epoch 8306 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8307 Train Loss: 7923.91

INFO:base_model:epoch 8323 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8324 Train Loss: 7930.8291015625 elapsed: 0.6202659606933594
DEBUG:base_model:epoch 8324 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8324, validation elapsed_time:0.07907891273498535
INFO:base_model:epoch 8324 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8325 Train Loss: 7923.91845703125 elapsed: 0.7891683578491211
DEBUG:base_model:epoch 8325 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8325, validation elapsed_time:0.10561323165893555
INFO:base_model:epoch 8325 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8326 Train Loss: 7930.

INFO:base_model:epoch 8342 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8343 Train Loss: 7923.91845703125 elapsed: 0.801516056060791
DEBUG:base_model:epoch 8343 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8343, validation elapsed_time:0.09123396873474121
INFO:base_model:epoch 8343 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8344 Train Loss: 7917.00732421875 elapsed: 0.6382768154144287
DEBUG:base_model:epoch 8344 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8344, validation elapsed_time:0.07581686973571777
INFO:base_model:epoch 8344 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8345 Train Loss: 7917.

INFO:base_model:epoch 8361 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8362 Train Loss: 7923.91845703125 elapsed: 0.6476380825042725
DEBUG:base_model:epoch 8362 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8362, validation elapsed_time:0.09427118301391602
INFO:base_model:epoch 8362 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8363 Train Loss: 7923.91845703125 elapsed: 0.6248841285705566
DEBUG:base_model:epoch 8363 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8363, validation elapsed_time:0.18898892402648926
INFO:base_model:epoch 8363 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8364 Train Loss: 7937

INFO:base_model:epoch 8380 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8381 Train Loss: 7917.00732421875 elapsed: 0.7874050140380859
DEBUG:base_model:epoch 8381 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8381, validation elapsed_time:0.1254870891571045
INFO:base_model:epoch 8381 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8382 Train Loss: 7917.00732421875 elapsed: 0.7195720672607422
DEBUG:base_model:epoch 8382 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8382, validation elapsed_time:0.07956290245056152
INFO:base_model:epoch 8382 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8383 Train Loss: 7917.

INFO:base_model:epoch 8399 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8400 Train Loss: 7923.91845703125 elapsed: 0.6473631858825684
DEBUG:base_model:epoch 8400 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8400, validation elapsed_time:0.09569001197814941
INFO:base_model:epoch 8400 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8401 Train Loss: 7930.8291015625 elapsed: 0.7535960674285889
DEBUG:base_model:epoch 8401 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8401, validation elapsed_time:0.12068295478820801
INFO:base_model:epoch 8401 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8402 Train Loss: 7937.

INFO:base_model:epoch 8418 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8419 Train Loss: 7917.00732421875 elapsed: 0.6240570545196533
DEBUG:base_model:epoch 8419 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8419, validation elapsed_time:0.14310503005981445
INFO:base_model:epoch 8419 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8420 Train Loss: 7930.8291015625 elapsed: 0.8528439998626709
DEBUG:base_model:epoch 8420 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8420, validation elapsed_time:0.09662795066833496
INFO:base_model:epoch 8420 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8421 Train Loss: 7937.

INFO:base_model:epoch 8437 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8438 Train Loss: 7944.6513671875 elapsed: 0.6636672019958496
DEBUG:base_model:epoch 8438 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8438, validation elapsed_time:0.11017870903015137
INFO:base_model:epoch 8438 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8439 Train Loss: 7923.91845703125 elapsed: 0.5918610095977783
DEBUG:base_model:epoch 8439 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8439, validation elapsed_time:0.07847881317138672
INFO:base_model:epoch 8439 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8440 Train Loss: 7910.

INFO:base_model:epoch 8456 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8457 Train Loss: 7937.740234375 elapsed: 0.6634109020233154
DEBUG:base_model:epoch 8457 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8457, validation elapsed_time:0.07967901229858398
INFO:base_model:epoch 8457 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8458 Train Loss: 7917.00732421875 elapsed: 0.8254499435424805
DEBUG:base_model:epoch 8458 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8458, validation elapsed_time:0.11040735244750977
INFO:base_model:epoch 8458 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8459 Train Loss: 7910.0

INFO:base_model:epoch 8475 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8476 Train Loss: 7930.8291015625 elapsed: 0.7079689502716064
DEBUG:base_model:epoch 8476 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8476, validation elapsed_time:0.079071044921875
INFO:base_model:epoch 8476 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8477 Train Loss: 7937.740234375 elapsed: 0.6825752258300781
DEBUG:base_model:epoch 8477 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8477, validation elapsed_time:0.07685708999633789
INFO:base_model:epoch 8477 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8478 Train Loss: 7917.0073

INFO:base_model:epoch 8494 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8495 Train Loss: 7923.91845703125 elapsed: 0.7292571067810059
DEBUG:base_model:epoch 8495 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8495, validation elapsed_time:0.1502079963684082
INFO:base_model:epoch 8495 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8496 Train Loss: 7923.91845703125 elapsed: 0.714033842086792
DEBUG:base_model:epoch 8496 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8496, validation elapsed_time:0.13989496231079102
INFO:base_model:epoch 8496 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8497 Train Loss: 7923.9

INFO:base_model:epoch 8513 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8514 Train Loss: 7930.8291015625 elapsed: 0.7281548976898193
DEBUG:base_model:epoch 8514 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8514, validation elapsed_time:0.08747124671936035
INFO:base_model:epoch 8514 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8515 Train Loss: 7917.00732421875 elapsed: 0.6608011722564697
DEBUG:base_model:epoch 8515 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8515, validation elapsed_time:0.11835002899169922
INFO:base_model:epoch 8515 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8516 Train Loss: 7923.

INFO:base_model:epoch 8532 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8533 Train Loss: 7917.00732421875 elapsed: 0.6408870220184326
DEBUG:base_model:epoch 8533 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8533, validation elapsed_time:0.10733222961425781
INFO:base_model:epoch 8533 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8534 Train Loss: 7923.91845703125 elapsed: 0.8653249740600586
DEBUG:base_model:epoch 8534 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8534, validation elapsed_time:0.09208989143371582
INFO:base_model:epoch 8534 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8535 Train Loss: 7930

INFO:base_model:epoch 8551 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8552 Train Loss: 7917.00732421875 elapsed: 0.8738980293273926
DEBUG:base_model:epoch 8552 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8552, validation elapsed_time:0.1250770092010498
INFO:base_model:epoch 8552 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8553 Train Loss: 7944.6513671875 elapsed: 0.6982607841491699
DEBUG:base_model:epoch 8553 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8553, validation elapsed_time:0.08015584945678711
INFO:base_model:epoch 8553 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8554 Train Loss: 7937.7

INFO:base_model:epoch 8570 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8571 Train Loss: 7937.740234375 elapsed: 0.6261708736419678
DEBUG:base_model:epoch 8571 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8571, validation elapsed_time:0.15610408782958984
INFO:base_model:epoch 8571 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8572 Train Loss: 7930.8291015625 elapsed: 0.7874343395233154
DEBUG:base_model:epoch 8572 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8572, validation elapsed_time:0.08413171768188477
INFO:base_model:epoch 8572 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8573 Train Loss: 7930.82

INFO:base_model:epoch 8589 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8590 Train Loss: 7930.8291015625 elapsed: 0.7632479667663574
DEBUG:base_model:epoch 8590 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8590, validation elapsed_time:0.07817697525024414
INFO:base_model:epoch 8590 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8591 Train Loss: 7930.8291015625 elapsed: 0.607125997543335
DEBUG:base_model:epoch 8591 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8591, validation elapsed_time:0.1380748748779297
INFO:base_model:epoch 8591 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8592 Train Loss: 7917.007

INFO:base_model:epoch 8608 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8609 Train Loss: 7923.91845703125 elapsed: 0.6079518795013428
DEBUG:base_model:epoch 8609 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8609, validation elapsed_time:0.07967615127563477
INFO:base_model:epoch 8609 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8610 Train Loss: 7917.00732421875 elapsed: 0.779583215713501
DEBUG:base_model:epoch 8610 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8610, validation elapsed_time:0.11496996879577637
INFO:base_model:epoch 8610 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8611 Train Loss: 7937.

INFO:base_model:epoch 8627 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8628 Train Loss: 7930.8291015625 elapsed: 0.773780107498169
DEBUG:base_model:epoch 8628 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8628, validation elapsed_time:0.10554695129394531
INFO:base_model:epoch 8628 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8629 Train Loss: 7923.91845703125 elapsed: 0.6826510429382324
DEBUG:base_model:epoch 8629 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8629, validation elapsed_time:0.10805988311767578
INFO:base_model:epoch 8629 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8630 Train Loss: 7923.9

INFO:base_model:epoch 8646 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8647 Train Loss: 7937.740234375 elapsed: 0.7791080474853516
DEBUG:base_model:epoch 8647 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8647, validation elapsed_time:0.08147001266479492
INFO:base_model:epoch 8647 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8648 Train Loss: 7930.8291015625 elapsed: 0.6528432369232178
DEBUG:base_model:epoch 8648 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8648, validation elapsed_time:0.08000516891479492
INFO:base_model:epoch 8648 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8649 Train Loss: 7917.00

INFO:base_model:epoch 8665 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8666 Train Loss: 7930.8291015625 elapsed: 0.8114407062530518
DEBUG:base_model:epoch 8666 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8666, validation elapsed_time:0.11698508262634277
INFO:base_model:epoch 8666 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8667 Train Loss: 7930.8291015625 elapsed: 0.8241050243377686
DEBUG:base_model:epoch 8667 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8667, validation elapsed_time:0.12185406684875488
INFO:base_model:epoch 8667 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8668 Train Loss: 7930.8

INFO:base_model:epoch 8684 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8685 Train Loss: 7917.00732421875 elapsed: 0.6988711357116699
DEBUG:base_model:epoch 8685 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8685, validation elapsed_time:0.07864594459533691
INFO:base_model:epoch 8685 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8686 Train Loss: 7923.91845703125 elapsed: 0.6344456672668457
DEBUG:base_model:epoch 8686 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8686, validation elapsed_time:0.0843510627746582
INFO:base_model:epoch 8686 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8687 Train Loss: 7930.

INFO:base_model:epoch 8703 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8704 Train Loss: 7930.8291015625 elapsed: 0.557898998260498
DEBUG:base_model:epoch 8704 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8704, validation elapsed_time:0.09733176231384277
INFO:base_model:epoch 8704 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8705 Train Loss: 7937.740234375 elapsed: 0.7256689071655273
DEBUG:base_model:epoch 8705 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8705, validation elapsed_time:0.16080093383789062
INFO:base_model:epoch 8705 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8706 Train Loss: 7930.829

INFO:base_model:epoch 8722 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8723 Train Loss: 7930.8291015625 elapsed: 0.8012638092041016
DEBUG:base_model:epoch 8723 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8723, validation elapsed_time:0.09939098358154297
INFO:base_model:epoch 8723 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8724 Train Loss: 7917.00732421875 elapsed: 0.6909699440002441
DEBUG:base_model:epoch 8724 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8724, validation elapsed_time:0.08053708076477051
INFO:base_model:epoch 8724 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8725 Train Loss: 7930.

INFO:base_model:epoch 8741 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8742 Train Loss: 7917.00732421875 elapsed: 0.6305761337280273
DEBUG:base_model:epoch 8742 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8742, validation elapsed_time:0.0831608772277832
INFO:base_model:epoch 8742 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8743 Train Loss: 7917.00732421875 elapsed: 0.9051449298858643
DEBUG:base_model:epoch 8743 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8743, validation elapsed_time:0.10435199737548828
INFO:base_model:epoch 8743 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8744 Train Loss: 7937.

INFO:base_model:epoch 8760 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8761 Train Loss: 7917.00732421875 elapsed: 0.7759649753570557
DEBUG:base_model:epoch 8761 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8761, validation elapsed_time:0.08875393867492676
INFO:base_model:epoch 8761 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8762 Train Loss: 7930.8291015625 elapsed: 0.7453911304473877
DEBUG:base_model:epoch 8762 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8762, validation elapsed_time:0.1321241855621338
INFO:base_model:epoch 8762 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8763 Train Loss: 7917.0

INFO:base_model:epoch 8779 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8780 Train Loss: 7937.740234375 elapsed: 0.6668260097503662
DEBUG:base_model:epoch 8780 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8780, validation elapsed_time:0.11142897605895996
INFO:base_model:epoch 8780 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8781 Train Loss: 7951.5625 elapsed: 0.6500749588012695
DEBUG:base_model:epoch 8781 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8781, validation elapsed_time:0.08536410331726074
INFO:base_model:epoch 8781 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8782 Train Loss: 7923.91845703

INFO:base_model:epoch 8798 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8799 Train Loss: 7910.09619140625 elapsed: 0.6278178691864014
DEBUG:base_model:epoch 8799 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8799, validation elapsed_time:0.09650111198425293
INFO:base_model:epoch 8799 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8800 Train Loss: 7923.91845703125 elapsed: 0.7538387775421143
DEBUG:base_model:epoch 8800 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8800, validation elapsed_time:0.08721399307250977
INFO:base_model:epoch 8800 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8801 Train Loss: 7923

INFO:base_model:epoch 8817 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8818 Train Loss: 7930.8291015625 elapsed: 0.7303333282470703
DEBUG:base_model:epoch 8818 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8818, validation elapsed_time:0.16184496879577637
INFO:base_model:epoch 8818 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8819 Train Loss: 7917.00732421875 elapsed: 0.6374080181121826
DEBUG:base_model:epoch 8819 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8819, validation elapsed_time:0.0890970230102539
INFO:base_model:epoch 8819 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8820 Train Loss: 7944.6

INFO:base_model:epoch 8836 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8837 Train Loss: 7917.00732421875 elapsed: 0.6141130924224854
DEBUG:base_model:epoch 8837 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8837, validation elapsed_time:0.10059809684753418
INFO:base_model:epoch 8837 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8838 Train Loss: 7923.91845703125 elapsed: 0.6934781074523926
DEBUG:base_model:epoch 8838 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8838, validation elapsed_time:0.09911584854125977
INFO:base_model:epoch 8838 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8839 Train Loss: 7937

INFO:base_model:epoch 8855 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8856 Train Loss: 7944.6513671875 elapsed: 0.6937730312347412
DEBUG:base_model:epoch 8856 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8856, validation elapsed_time:0.07920718193054199
INFO:base_model:epoch 8856 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8857 Train Loss: 7937.740234375 elapsed: 0.732485294342041
DEBUG:base_model:epoch 8857 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8857, validation elapsed_time:0.1383962631225586
INFO:base_model:epoch 8857 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8858 Train Loss: 7944.6513

INFO:base_model:epoch 8874 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8875 Train Loss: 7917.00732421875 elapsed: 0.8398919105529785
DEBUG:base_model:epoch 8875 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8875, validation elapsed_time:0.08447813987731934
INFO:base_model:epoch 8875 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8876 Train Loss: 7937.740234375 elapsed: 0.5931556224822998
DEBUG:base_model:epoch 8876 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8876, validation elapsed_time:0.09579014778137207
INFO:base_model:epoch 8876 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8877 Train Loss: 7930.8

INFO:base_model:epoch 8893 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8894 Train Loss: 7937.740234375 elapsed: 0.5871181488037109
DEBUG:base_model:epoch 8894 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8894, validation elapsed_time:0.09800481796264648
INFO:base_model:epoch 8894 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8895 Train Loss: 7923.91845703125 elapsed: 0.6750552654266357
DEBUG:base_model:epoch 8895 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8895, validation elapsed_time:0.0802009105682373
INFO:base_model:epoch 8895 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8896 Train Loss: 7930.82

INFO:base_model:epoch 8912 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8913 Train Loss: 7937.740234375 elapsed: 0.8536398410797119
DEBUG:base_model:epoch 8913 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8913, validation elapsed_time:0.09676194190979004
INFO:base_model:epoch 8913 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8914 Train Loss: 7917.00732421875 elapsed: 0.80548095703125
DEBUG:base_model:epoch 8914 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8914, validation elapsed_time:0.07619976997375488
INFO:base_model:epoch 8914 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8915 Train Loss: 7923.918

INFO:base_model:epoch 8931 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8932 Train Loss: 7917.00732421875 elapsed: 0.7138957977294922
DEBUG:base_model:epoch 8932 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8932, validation elapsed_time:0.2332470417022705
INFO:base_model:epoch 8932 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8933 Train Loss: 7930.8291015625 elapsed: 0.6337900161743164
DEBUG:base_model:epoch 8933 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8933, validation elapsed_time:0.08352303504943848
INFO:base_model:epoch 8933 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8934 Train Loss: 7930.8

INFO:base_model:epoch 8950 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8951 Train Loss: 7937.740234375 elapsed: 0.7246463298797607
DEBUG:base_model:epoch 8951 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8951, validation elapsed_time:0.0944209098815918
INFO:base_model:epoch 8951 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8952 Train Loss: 7917.00732421875 elapsed: 0.6926379203796387
DEBUG:base_model:epoch 8952 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8952, validation elapsed_time:0.08035111427307129
INFO:base_model:epoch 8952 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8953 Train Loss: 7937.74

INFO:base_model:epoch 8969 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8970 Train Loss: 7903.18505859375 elapsed: 0.7837619781494141
DEBUG:base_model:epoch 8970 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8970, validation elapsed_time:0.07959675788879395
INFO:base_model:epoch 8970 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8971 Train Loss: 7944.6513671875 elapsed: 0.7353470325469971
DEBUG:base_model:epoch 8971 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8971, validation elapsed_time:0.20267009735107422
INFO:base_model:epoch 8971 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8972 Train Loss: 7923.

INFO:base_model:epoch 8988 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8989 Train Loss: 7937.740234375 elapsed: 0.7011499404907227
DEBUG:base_model:epoch 8989 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8989, validation elapsed_time:0.12388110160827637
INFO:base_model:epoch 8989 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8990 Train Loss: 7917.00732421875 elapsed: 0.6750361919403076
DEBUG:base_model:epoch 8990 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:8990, validation elapsed_time:0.09044718742370605
INFO:base_model:epoch 8990 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 8991 Train Loss: 7903.1

INFO:base_model:epoch 9007 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9008 Train Loss: 7910.09619140625 elapsed: 0.7278158664703369
DEBUG:base_model:epoch 9008 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9008, validation elapsed_time:0.07874703407287598
INFO:base_model:epoch 9008 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9009 Train Loss: 7930.8291015625 elapsed: 0.6510088443756104
DEBUG:base_model:epoch 9009 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9009, validation elapsed_time:0.08150768280029297
INFO:base_model:epoch 9009 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9010 Train Loss: 7923.

INFO:base_model:epoch 9026 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9027 Train Loss: 7917.00732421875 elapsed: 0.7287018299102783
DEBUG:base_model:epoch 9027 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9027, validation elapsed_time:0.09603309631347656
INFO:base_model:epoch 9027 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9028 Train Loss: 7923.91845703125 elapsed: 0.8158359527587891
DEBUG:base_model:epoch 9028 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9028, validation elapsed_time:0.10836029052734375
INFO:base_model:epoch 9028 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9029 Train Loss: 7930

INFO:base_model:epoch 9045 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9046 Train Loss: 7923.91845703125 elapsed: 0.6404502391815186
DEBUG:base_model:epoch 9046 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9046, validation elapsed_time:0.0795907974243164
INFO:base_model:epoch 9046 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9047 Train Loss: 7937.740234375 elapsed: 0.830833911895752
DEBUG:base_model:epoch 9047 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9047, validation elapsed_time:0.09059596061706543
INFO:base_model:epoch 9047 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9048 Train Loss: 7937.740

INFO:base_model:epoch 9064 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9065 Train Loss: 7923.91845703125 elapsed: 0.73612380027771
DEBUG:base_model:epoch 9065 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9065, validation elapsed_time:0.0795142650604248
INFO:base_model:epoch 9065 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9066 Train Loss: 7923.91845703125 elapsed: 0.7205679416656494
DEBUG:base_model:epoch 9066 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9066, validation elapsed_time:0.0903620719909668
INFO:base_model:epoch 9066 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9067 Train Loss: 7930.829

INFO:base_model:epoch 9083 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9084 Train Loss: 7903.18505859375 elapsed: 0.7418718338012695
DEBUG:base_model:epoch 9084 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9084, validation elapsed_time:0.08807015419006348
INFO:base_model:epoch 9084 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9085 Train Loss: 7917.00732421875 elapsed: 0.7714889049530029
DEBUG:base_model:epoch 9085 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9085, validation elapsed_time:0.14242100715637207
INFO:base_model:epoch 9085 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9086 Train Loss: 7930

INFO:base_model:epoch 9102 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9103 Train Loss: 7937.740234375 elapsed: 0.8031327724456787
DEBUG:base_model:epoch 9103 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9103, validation elapsed_time:0.10185885429382324
INFO:base_model:epoch 9103 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9104 Train Loss: 7910.09619140625 elapsed: 0.7604880332946777
DEBUG:base_model:epoch 9104 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9104, validation elapsed_time:0.08026003837585449
INFO:base_model:epoch 9104 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9105 Train Loss: 7923.9

INFO:base_model:epoch 9121 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9122 Train Loss: 7923.91845703125 elapsed: 0.7166540622711182
DEBUG:base_model:epoch 9122 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9122, validation elapsed_time:0.08015894889831543
INFO:base_model:epoch 9122 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9123 Train Loss: 7917.00732421875 elapsed: 0.7133581638336182
DEBUG:base_model:epoch 9123 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9123, validation elapsed_time:0.08101201057434082
INFO:base_model:epoch 9123 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9124 Train Loss: 7930

INFO:base_model:epoch 9140 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9141 Train Loss: 7923.91845703125 elapsed: 0.873046875
DEBUG:base_model:epoch 9141 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9141, validation elapsed_time:0.10744476318359375
INFO:base_model:epoch 9141 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9142 Train Loss: 7951.5625 elapsed: 0.9702310562133789
DEBUG:base_model:epoch 9142 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9142, validation elapsed_time:0.13228702545166016
INFO:base_model:epoch 9142 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9143 Train Loss: 7923.91845703125 e

INFO:base_model:epoch 9159 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9160 Train Loss: 7923.91845703125 elapsed: 0.7587840557098389
DEBUG:base_model:epoch 9160 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9160, validation elapsed_time:0.11790895462036133
INFO:base_model:epoch 9160 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9161 Train Loss: 7937.740234375 elapsed: 0.6720409393310547
DEBUG:base_model:epoch 9161 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9161, validation elapsed_time:0.07886910438537598
INFO:base_model:epoch 9161 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9162 Train Loss: 7910.0

INFO:base_model:epoch 9178 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9179 Train Loss: 7923.91845703125 elapsed: 0.6668519973754883
DEBUG:base_model:epoch 9179 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9179, validation elapsed_time:0.10596108436584473
INFO:base_model:epoch 9179 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9180 Train Loss: 7944.6513671875 elapsed: 0.6978929042816162
DEBUG:base_model:epoch 9180 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9180, validation elapsed_time:0.08061790466308594
INFO:base_model:epoch 9180 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9181 Train Loss: 7944.

INFO:base_model:epoch 9197 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9198 Train Loss: 7910.09619140625 elapsed: 0.5573136806488037
DEBUG:base_model:epoch 9198 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9198, validation elapsed_time:0.09245109558105469
INFO:base_model:epoch 9198 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9199 Train Loss: 7923.91845703125 elapsed: 0.6370840072631836
DEBUG:base_model:epoch 9199 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9199, validation elapsed_time:0.19353985786437988
INFO:base_model:epoch 9199 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9200 Train Loss: 7937

INFO:base_model:epoch 9216 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9217 Train Loss: 7937.740234375 elapsed: 0.815316915512085
DEBUG:base_model:epoch 9217 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9217, validation elapsed_time:0.09221792221069336
INFO:base_model:epoch 9217 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9218 Train Loss: 7937.740234375 elapsed: 0.711522102355957
DEBUG:base_model:epoch 9218 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9218, validation elapsed_time:0.0801239013671875
INFO:base_model:epoch 9218 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9219 Train Loss: 7923.918457

INFO:base_model:epoch 9235 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9236 Train Loss: 7917.00732421875 elapsed: 0.8285539150238037
DEBUG:base_model:epoch 9236 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9236, validation elapsed_time:0.08147692680358887
INFO:base_model:epoch 9236 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9237 Train Loss: 7923.91845703125 elapsed: 0.7507119178771973
DEBUG:base_model:epoch 9237 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9237, validation elapsed_time:0.08626627922058105
INFO:base_model:epoch 9237 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9238 Train Loss: 7910

INFO:base_model:epoch 9254 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9255 Train Loss: 7937.740234375 elapsed: 0.7299492359161377
DEBUG:base_model:epoch 9255 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9255, validation elapsed_time:0.08491706848144531
INFO:base_model:epoch 9255 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9256 Train Loss: 7930.8291015625 elapsed: 0.8232440948486328
DEBUG:base_model:epoch 9256 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9256, validation elapsed_time:0.10656213760375977
INFO:base_model:epoch 9256 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9257 Train Loss: 7903.18

INFO:base_model:epoch 9273 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9274 Train Loss: 7937.740234375 elapsed: 0.7131180763244629
DEBUG:base_model:epoch 9274 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9274, validation elapsed_time:0.10771012306213379
INFO:base_model:epoch 9274 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9275 Train Loss: 7917.00732421875 elapsed: 0.7345068454742432
DEBUG:base_model:epoch 9275 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9275, validation elapsed_time:0.07857084274291992
INFO:base_model:epoch 9275 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9276 Train Loss: 7937.7

INFO:base_model:epoch 9292 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9293 Train Loss: 7910.09619140625 elapsed: 0.6597628593444824
DEBUG:base_model:epoch 9293 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9293, validation elapsed_time:0.0790259838104248
INFO:base_model:epoch 9293 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9294 Train Loss: 7923.91845703125 elapsed: 0.7517907619476318
DEBUG:base_model:epoch 9294 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9294, validation elapsed_time:0.12628507614135742
INFO:base_model:epoch 9294 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9295 Train Loss: 7930.

INFO:base_model:epoch 9311 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9312 Train Loss: 7944.6513671875 elapsed: 0.8223669528961182
DEBUG:base_model:epoch 9312 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9312, validation elapsed_time:0.11794304847717285
INFO:base_model:epoch 9312 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9313 Train Loss: 7923.91845703125 elapsed: 0.740839958190918
DEBUG:base_model:epoch 9313 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9313, validation elapsed_time:0.08616924285888672
INFO:base_model:epoch 9313 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9314 Train Loss: 7903.1

INFO:base_model:epoch 9330 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9331 Train Loss: 7944.6513671875 elapsed: 0.6477870941162109
DEBUG:base_model:epoch 9331 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9331, validation elapsed_time:0.08031105995178223
INFO:base_model:epoch 9331 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9332 Train Loss: 7930.8291015625 elapsed: 0.7058918476104736
DEBUG:base_model:epoch 9332 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9332, validation elapsed_time:0.22659707069396973
INFO:base_model:epoch 9332 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9333 Train Loss: 7930.8

INFO:base_model:epoch 9349 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9350 Train Loss: 7937.740234375 elapsed: 0.627830982208252
DEBUG:base_model:epoch 9350 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9350, validation elapsed_time:0.08131814002990723
INFO:base_model:epoch 9350 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9351 Train Loss: 7917.00732421875 elapsed: 0.811363935470581
DEBUG:base_model:epoch 9351 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9351, validation elapsed_time:0.1484212875366211
INFO:base_model:epoch 9351 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9352 Train Loss: 7896.2739

INFO:base_model:epoch 9368 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9369 Train Loss: 7930.8291015625 elapsed: 1.9794909954071045
DEBUG:base_model:epoch 9369 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9369, validation elapsed_time:0.1578209400177002
INFO:base_model:epoch 9369 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9370 Train Loss: 7917.00732421875 elapsed: 1.7222590446472168
DEBUG:base_model:epoch 9370 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9370, validation elapsed_time:0.4386310577392578
INFO:base_model:epoch 9370 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9371 Train Loss: 7930.82

INFO:base_model:epoch 9387 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9388 Train Loss: 7917.00732421875 elapsed: 0.8996920585632324
DEBUG:base_model:epoch 9388 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9388, validation elapsed_time:0.10242104530334473
INFO:base_model:epoch 9388 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9389 Train Loss: 7944.6513671875 elapsed: 0.7549078464508057
DEBUG:base_model:epoch 9389 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9389, validation elapsed_time:0.08225321769714355
INFO:base_model:epoch 9389 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9390 Train Loss: 7923.

INFO:base_model:epoch 9406 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9407 Train Loss: 7937.740234375 elapsed: 0.7195219993591309
DEBUG:base_model:epoch 9407 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9407, validation elapsed_time:0.07922697067260742
INFO:base_model:epoch 9407 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9408 Train Loss: 7937.740234375 elapsed: 0.881925106048584
DEBUG:base_model:epoch 9408 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9408, validation elapsed_time:0.11435604095458984
INFO:base_model:epoch 9408 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9409 Train Loss: 7930.8291

INFO:base_model:epoch 9425 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9426 Train Loss: 7917.00732421875 elapsed: 0.6166400909423828
DEBUG:base_model:epoch 9426 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9426, validation elapsed_time:0.11583209037780762
INFO:base_model:epoch 9426 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9427 Train Loss: 7944.6513671875 elapsed: 0.6875019073486328
DEBUG:base_model:epoch 9427 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9427, validation elapsed_time:0.11615729331970215
INFO:base_model:epoch 9427 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9428 Train Loss: 7923.

INFO:base_model:epoch 9444 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9445 Train Loss: 7917.00732421875 elapsed: 0.7863771915435791
DEBUG:base_model:epoch 9445 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9445, validation elapsed_time:0.13649225234985352
INFO:base_model:epoch 9445 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9446 Train Loss: 7917.00732421875 elapsed: 0.7169959545135498
DEBUG:base_model:epoch 9446 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9446, validation elapsed_time:0.09799814224243164
INFO:base_model:epoch 9446 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9447 Train Loss: 7910

INFO:base_model:epoch 9463 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9464 Train Loss: 7917.00732421875 elapsed: 0.6475479602813721
DEBUG:base_model:epoch 9464 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9464, validation elapsed_time:0.09106707572937012
INFO:base_model:epoch 9464 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9465 Train Loss: 7917.00732421875 elapsed: 0.8501150608062744
DEBUG:base_model:epoch 9465 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9465, validation elapsed_time:0.11800599098205566
INFO:base_model:epoch 9465 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9466 Train Loss: 7917

INFO:base_model:epoch 9482 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9483 Train Loss: 7937.740234375 elapsed: 0.8418619632720947
DEBUG:base_model:epoch 9483 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9483, validation elapsed_time:0.1313920021057129
INFO:base_model:epoch 9483 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9484 Train Loss: 7917.00732421875 elapsed: 0.6713521480560303
DEBUG:base_model:epoch 9484 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9484, validation elapsed_time:0.0818338394165039
INFO:base_model:epoch 9484 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9485 Train Loss: 7917.007

INFO:base_model:epoch 9501 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9502 Train Loss: 7910.09619140625 elapsed: 0.7749102115631104
DEBUG:base_model:epoch 9502 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9502, validation elapsed_time:0.0932459831237793
INFO:base_model:epoch 9502 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9503 Train Loss: 7896.27392578125 elapsed: 0.7220950126647949
DEBUG:base_model:epoch 9503 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9503, validation elapsed_time:0.08127403259277344
INFO:base_model:epoch 9503 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9504 Train Loss: 7917.

INFO:base_model:epoch 9520 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9521 Train Loss: 7923.91845703125 elapsed: 0.807145357131958
DEBUG:base_model:epoch 9521 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9521, validation elapsed_time:0.0868070125579834
INFO:base_model:epoch 9521 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9522 Train Loss: 7923.91845703125 elapsed: 0.7936029434204102
DEBUG:base_model:epoch 9522 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9522, validation elapsed_time:0.09850692749023438
INFO:base_model:epoch 9522 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9523 Train Loss: 7923.9

INFO:base_model:epoch 9539 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9540 Train Loss: 7910.09619140625 elapsed: 0.7278330326080322
DEBUG:base_model:epoch 9540 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9540, validation elapsed_time:0.08240318298339844
INFO:base_model:epoch 9540 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9541 Train Loss: 7910.09619140625 elapsed: 0.6886940002441406
DEBUG:base_model:epoch 9541 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9541, validation elapsed_time:0.0820460319519043
INFO:base_model:epoch 9541 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9542 Train Loss: 7923.

INFO:base_model:epoch 9558 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9559 Train Loss: 7944.6513671875 elapsed: 0.7513148784637451
DEBUG:base_model:epoch 9559 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9559, validation elapsed_time:0.08075404167175293
INFO:base_model:epoch 9559 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9560 Train Loss: 7917.00732421875 elapsed: 0.7156758308410645
DEBUG:base_model:epoch 9560 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9560, validation elapsed_time:0.08304286003112793
INFO:base_model:epoch 9560 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9561 Train Loss: 7910.

INFO:base_model:epoch 9577 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9578 Train Loss: 7930.8291015625 elapsed: 0.7790169715881348
DEBUG:base_model:epoch 9578 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9578, validation elapsed_time:0.1683061122894287
INFO:base_model:epoch 9578 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9579 Train Loss: 7930.8291015625 elapsed: 0.78615403175354
DEBUG:base_model:epoch 9579 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9579, validation elapsed_time:0.08802199363708496
INFO:base_model:epoch 9579 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9580 Train Loss: 7917.0073

INFO:base_model:epoch 9596 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9597 Train Loss: 7923.91845703125 elapsed: 0.740217924118042
DEBUG:base_model:epoch 9597 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9597, validation elapsed_time:0.08503508567810059
INFO:base_model:epoch 9597 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9598 Train Loss: 7917.00732421875 elapsed: 0.92047119140625
DEBUG:base_model:epoch 9598 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9598, validation elapsed_time:0.11652398109436035
INFO:base_model:epoch 9598 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9599 Train Loss: 7937.74

INFO:base_model:epoch 9615 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9616 Train Loss: 7917.00732421875 elapsed: 0.6748130321502686
DEBUG:base_model:epoch 9616 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9616, validation elapsed_time:0.10230112075805664
INFO:base_model:epoch 9616 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9617 Train Loss: 7930.8291015625 elapsed: 0.8615691661834717
DEBUG:base_model:epoch 9617 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9617, validation elapsed_time:0.12919306755065918
INFO:base_model:epoch 9617 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9618 Train Loss: 7910.

INFO:base_model:epoch 9634 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9635 Train Loss: 7930.8291015625 elapsed: 0.8104488849639893
DEBUG:base_model:epoch 9635 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9635, validation elapsed_time:0.12255215644836426
INFO:base_model:epoch 9635 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9636 Train Loss: 7930.8291015625 elapsed: 0.6462340354919434
DEBUG:base_model:epoch 9636 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9636, validation elapsed_time:0.12019801139831543
INFO:base_model:epoch 9636 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9637 Train Loss: 7930.8

INFO:base_model:epoch 9653 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9654 Train Loss: 7937.740234375 elapsed: 0.7518792152404785
DEBUG:base_model:epoch 9654 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9654, validation elapsed_time:0.1545548439025879
INFO:base_model:epoch 9654 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9655 Train Loss: 7923.91845703125 elapsed: 0.7530770301818848
DEBUG:base_model:epoch 9655 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9655, validation elapsed_time:0.1211860179901123
INFO:base_model:epoch 9655 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9656 Train Loss: 7917.007

INFO:base_model:epoch 9672 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9673 Train Loss: 7923.91845703125 elapsed: 0.8150908946990967
DEBUG:base_model:epoch 9673 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9673, validation elapsed_time:0.08457183837890625
INFO:base_model:epoch 9673 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9674 Train Loss: 7923.91845703125 elapsed: 0.7090771198272705
DEBUG:base_model:epoch 9674 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9674, validation elapsed_time:0.0878598690032959
INFO:base_model:epoch 9674 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9675 Train Loss: 7944.

INFO:base_model:epoch 9691 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9692 Train Loss: 7923.91845703125 elapsed: 0.8705160617828369
DEBUG:base_model:epoch 9692 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9692, validation elapsed_time:0.09315323829650879
INFO:base_model:epoch 9692 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9693 Train Loss: 7917.00732421875 elapsed: 0.6319911479949951
DEBUG:base_model:epoch 9693 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9693, validation elapsed_time:0.08326888084411621
INFO:base_model:epoch 9693 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9694 Train Loss: 7923

INFO:base_model:epoch 9710 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9711 Train Loss: 7923.91845703125 elapsed: 0.859468936920166
DEBUG:base_model:epoch 9711 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9711, validation elapsed_time:0.10196924209594727
INFO:base_model:epoch 9711 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9712 Train Loss: 7944.6513671875 elapsed: 0.9929330348968506
DEBUG:base_model:epoch 9712 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9712, validation elapsed_time:0.13009905815124512
INFO:base_model:epoch 9712 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9713 Train Loss: 7910.0

INFO:base_model:epoch 9729 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9730 Train Loss: 7930.8291015625 elapsed: 0.6688089370727539
DEBUG:base_model:epoch 9730 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9730, validation elapsed_time:0.08184337615966797
INFO:base_model:epoch 9730 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9731 Train Loss: 7944.6513671875 elapsed: 0.6576151847839355
DEBUG:base_model:epoch 9731 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9731, validation elapsed_time:0.08063888549804688
INFO:base_model:epoch 9731 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9732 Train Loss: 7930.8

INFO:base_model:epoch 9748 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9749 Train Loss: 7923.91845703125 elapsed: 0.7857401371002197
DEBUG:base_model:epoch 9749 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9749, validation elapsed_time:0.08285713195800781
INFO:base_model:epoch 9749 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9750 Train Loss: 7917.00732421875 elapsed: 0.715752124786377
DEBUG:base_model:epoch 9750 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9750, validation elapsed_time:0.08065581321716309
INFO:base_model:epoch 9750 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9751 Train Loss: 7930.

INFO:base_model:epoch 9767 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9768 Train Loss: 7923.91845703125 elapsed: 0.8030960559844971
DEBUG:base_model:epoch 9768 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9768, validation elapsed_time:0.14171791076660156
INFO:base_model:epoch 9768 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9769 Train Loss: 7944.6513671875 elapsed: 0.8572628498077393
DEBUG:base_model:epoch 9769 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9769, validation elapsed_time:0.08224821090698242
INFO:base_model:epoch 9769 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9770 Train Loss: 7930.

INFO:base_model:epoch 9786 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9787 Train Loss: 7937.740234375 elapsed: 0.7012579441070557
DEBUG:base_model:epoch 9787 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9787, validation elapsed_time:0.0810861587524414
INFO:base_model:epoch 9787 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9788 Train Loss: 7923.91845703125 elapsed: 0.9310736656188965
DEBUG:base_model:epoch 9788 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9788, validation elapsed_time:0.10555124282836914
INFO:base_model:epoch 9788 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9789 Train Loss: 7910.09

INFO:base_model:epoch 9805 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9806 Train Loss: 7930.8291015625 elapsed: 0.8825621604919434
DEBUG:base_model:epoch 9806 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9806, validation elapsed_time:0.0937337875366211
INFO:base_model:epoch 9806 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9807 Train Loss: 7910.09619140625 elapsed: 0.6682939529418945
DEBUG:base_model:epoch 9807 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9807, validation elapsed_time:0.08250975608825684
INFO:base_model:epoch 9807 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9808 Train Loss: 7944.6

INFO:base_model:epoch 9824 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9825 Train Loss: 7910.09619140625 elapsed: 0.7855422496795654
DEBUG:base_model:epoch 9825 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9825, validation elapsed_time:0.09442496299743652
INFO:base_model:epoch 9825 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9826 Train Loss: 7937.740234375 elapsed: 0.7078180313110352
DEBUG:base_model:epoch 9826 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9826, validation elapsed_time:0.08221721649169922
INFO:base_model:epoch 9826 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9827 Train Loss: 7930.8

INFO:base_model:epoch 9843 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9844 Train Loss: 7930.8291015625 elapsed: 0.805743932723999
DEBUG:base_model:epoch 9844 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9844, validation elapsed_time:0.08068418502807617
INFO:base_model:epoch 9844 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9845 Train Loss: 7930.8291015625 elapsed: 0.8467910289764404
DEBUG:base_model:epoch 9845 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9845, validation elapsed_time:0.12900590896606445
INFO:base_model:epoch 9845 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9846 Train Loss: 7930.82

INFO:base_model:epoch 9862 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9863 Train Loss: 7917.00732421875 elapsed: 0.7788801193237305
DEBUG:base_model:epoch 9863 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9863, validation elapsed_time:0.13036370277404785
INFO:base_model:epoch 9863 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9864 Train Loss: 7930.8291015625 elapsed: 0.7780678272247314
DEBUG:base_model:epoch 9864 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9864, validation elapsed_time:0.08796501159667969
INFO:base_model:epoch 9864 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9865 Train Loss: 7937.

INFO:base_model:epoch 9881 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9882 Train Loss: 7930.8291015625 elapsed: 0.6416592597961426
DEBUG:base_model:epoch 9882 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9882, validation elapsed_time:0.09767484664916992
INFO:base_model:epoch 9882 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9883 Train Loss: 7944.6513671875 elapsed: 0.8643572330474854
DEBUG:base_model:epoch 9883 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9883, validation elapsed_time:0.09580397605895996
INFO:base_model:epoch 9883 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9884 Train Loss: 7930.8

INFO:base_model:epoch 9900 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9901 Train Loss: 7937.740234375 elapsed: 0.8982222080230713
DEBUG:base_model:epoch 9901 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9901, validation elapsed_time:0.20461177825927734
INFO:base_model:epoch 9901 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9902 Train Loss: 7917.00732421875 elapsed: 0.847121000289917
DEBUG:base_model:epoch 9902 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9902, validation elapsed_time:0.20844221115112305
INFO:base_model:epoch 9902 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9903 Train Loss: 7944.65

INFO:base_model:epoch 9919 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9920 Train Loss: 7944.6513671875 elapsed: 0.7830028533935547
DEBUG:base_model:epoch 9920 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9920, validation elapsed_time:0.1220552921295166
INFO:base_model:epoch 9920 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9921 Train Loss: 7917.00732421875 elapsed: 0.6476991176605225
DEBUG:base_model:epoch 9921 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9921, validation elapsed_time:0.10007381439208984
INFO:base_model:epoch 9921 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9922 Train Loss: 7923.9

INFO:base_model:epoch 9938 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9939 Train Loss: 7923.91845703125 elapsed: 0.7906548976898193
DEBUG:base_model:epoch 9939 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9939, validation elapsed_time:0.08268904685974121
INFO:base_model:epoch 9939 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9940 Train Loss: 7917.00732421875 elapsed: 0.7777643203735352
DEBUG:base_model:epoch 9940 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9940, validation elapsed_time:0.14606904983520508
INFO:base_model:epoch 9940 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9941 Train Loss: 7937

INFO:base_model:epoch 9957 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9958 Train Loss: 7903.18505859375 elapsed: 0.8104608058929443
DEBUG:base_model:epoch 9958 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9958, validation elapsed_time:0.11471009254455566
INFO:base_model:epoch 9958 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9959 Train Loss: 7930.8291015625 elapsed: 0.6261019706726074
DEBUG:base_model:epoch 9959 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9959, validation elapsed_time:0.12588167190551758
INFO:base_model:epoch 9959 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9960 Train Loss: 7937.

INFO:base_model:epoch 9976 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9977 Train Loss: 7937.740234375 elapsed: 0.8175899982452393
DEBUG:base_model:epoch 9977 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9977, validation elapsed_time:0.12156009674072266
INFO:base_model:epoch 9977 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9978 Train Loss: 7930.8291015625 elapsed: 0.7882180213928223
DEBUG:base_model:epoch 9978 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9978, validation elapsed_time:0.07751870155334473
INFO:base_model:epoch 9978 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9979 Train Loss: 7930.82

INFO:base_model:epoch 9995 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9996 Train Loss: 7944.6513671875 elapsed: 0.7934877872467041
DEBUG:base_model:epoch 9996 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9996, validation elapsed_time:0.1261909008026123
INFO:base_model:epoch 9996 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9997 Train Loss: 7917.00732421875 elapsed: 0.8310079574584961
DEBUG:base_model:epoch 9997 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:9997, validation elapsed_time:0.17406320571899414
INFO:base_model:epoch 9997 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 9998 Train Loss: 7930.8

INFO:model.base_model:cur_epoch:10014, validation elapsed_time:0.17743396759033203
INFO:base_model:epoch 10014 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10015 Train Loss: 7930.8291015625 elapsed: 0.7097110748291016
DEBUG:base_model:epoch 10015 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10015, validation elapsed_time:0.113037109375
INFO:base_model:epoch 10015 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10016 Train Loss: 7917.00732421875 elapsed: 0.6979236602783203
DEBUG:base_model:epoch 10016 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10016, validation elapsed_time:0.11985087394714355
INFO:base_model:epoch 10016 val result: {'loss'

INFO:model.base_model:cur_epoch:10033, validation elapsed_time:0.08337998390197754
INFO:base_model:epoch 10033 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10034 Train Loss: 7937.740234375 elapsed: 0.7626361846923828
DEBUG:base_model:epoch 10034 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10034, validation elapsed_time:0.10962820053100586
INFO:base_model:epoch 10034 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10035 Train Loss: 7944.6513671875 elapsed: 0.7741570472717285
DEBUG:base_model:epoch 10035 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10035, validation elapsed_time:0.12055110931396484
INFO:base_model:epoch 10035 val result: {'lo

INFO:model.base_model:cur_epoch:10052, validation elapsed_time:0.17509722709655762
INFO:base_model:epoch 10052 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10053 Train Loss: 7937.740234375 elapsed: 0.714846134185791
DEBUG:base_model:epoch 10053 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10053, validation elapsed_time:0.11665487289428711
INFO:base_model:epoch 10053 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10054 Train Loss: 7903.18505859375 elapsed: 0.6938958168029785
DEBUG:base_model:epoch 10054 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7903.1851, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10054, validation elapsed_time:0.1156473159790039
INFO:base_model:epoch 10054 val result: {'los

INFO:model.base_model:cur_epoch:10071, validation elapsed_time:0.08342194557189941
INFO:base_model:epoch 10071 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10072 Train Loss: 7910.09619140625 elapsed: 0.834115743637085
DEBUG:base_model:epoch 10072 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10072, validation elapsed_time:0.143388032913208
INFO:base_model:epoch 10072 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10073 Train Loss: 7930.8291015625 elapsed: 0.8193817138671875
DEBUG:base_model:epoch 10073 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10073, validation elapsed_time:0.10261917114257812
INFO:base_model:epoch 10073 val result: {'los

INFO:model.base_model:cur_epoch:10090, validation elapsed_time:0.08071088790893555
INFO:base_model:epoch 10090 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10091 Train Loss: 7923.91845703125 elapsed: 0.6628017425537109
DEBUG:base_model:epoch 10091 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10091, validation elapsed_time:0.10285162925720215
INFO:base_model:epoch 10091 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10092 Train Loss: 7930.8291015625 elapsed: 0.7756657600402832
DEBUG:base_model:epoch 10092 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10092, validation elapsed_time:0.12870001792907715
INFO:base_model:epoch 10092 val result: {'

INFO:model.base_model:cur_epoch:10109, validation elapsed_time:0.10583710670471191
INFO:base_model:epoch 10109 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10110 Train Loss: 7923.91845703125 elapsed: 0.8179600238800049
DEBUG:base_model:epoch 10110 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10110, validation elapsed_time:0.14029717445373535
INFO:base_model:epoch 10110 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10111 Train Loss: 7917.00732421875 elapsed: 0.6736471652984619
DEBUG:base_model:epoch 10111 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10111, validation elapsed_time:0.0795891284942627
INFO:base_model:epoch 10111 val result: {'

INFO:model.base_model:cur_epoch:10128, validation elapsed_time:0.09815120697021484
INFO:base_model:epoch 10128 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10129 Train Loss: 7917.00732421875 elapsed: 0.7725679874420166
DEBUG:base_model:epoch 10129 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10129, validation elapsed_time:0.11359000205993652
INFO:base_model:epoch 10129 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10130 Train Loss: 7923.91845703125 elapsed: 0.8485560417175293
DEBUG:base_model:epoch 10130 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10130, validation elapsed_time:0.11005616188049316
INFO:base_model:epoch 10130 val result: {

INFO:model.base_model:cur_epoch:10147, validation elapsed_time:0.26900291442871094
INFO:base_model:epoch 10147 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10148 Train Loss: 7930.8291015625 elapsed: 0.7241530418395996
DEBUG:base_model:epoch 10148 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10148, validation elapsed_time:0.08698511123657227
INFO:base_model:epoch 10148 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10149 Train Loss: 7930.8291015625 elapsed: 0.702347993850708
DEBUG:base_model:epoch 10149 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10149, validation elapsed_time:0.07962608337402344
INFO:base_model:epoch 10149 val result: {'lo

INFO:model.base_model:cur_epoch:10166, validation elapsed_time:0.0962362289428711
INFO:base_model:epoch 10166 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10167 Train Loss: 7917.00732421875 elapsed: 0.8196401596069336
DEBUG:base_model:epoch 10167 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10167, validation elapsed_time:0.09645891189575195
INFO:base_model:epoch 10167 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10168 Train Loss: 7930.8291015625 elapsed: 0.7328310012817383
DEBUG:base_model:epoch 10168 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10168, validation elapsed_time:0.1111898422241211
INFO:base_model:epoch 10168 val result: {'lo

INFO:model.base_model:cur_epoch:10185, validation elapsed_time:0.11078286170959473
INFO:base_model:epoch 10185 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10186 Train Loss: 7937.740234375 elapsed: 0.6724429130554199
DEBUG:base_model:epoch 10186 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10186, validation elapsed_time:0.09649801254272461
INFO:base_model:epoch 10186 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10187 Train Loss: 7930.8291015625 elapsed: 0.7847309112548828
DEBUG:base_model:epoch 10187 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10187, validation elapsed_time:0.0881352424621582
INFO:base_model:epoch 10187 val result: {'los

INFO:model.base_model:cur_epoch:10204, validation elapsed_time:0.07943511009216309
INFO:base_model:epoch 10204 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10205 Train Loss: 7930.8291015625 elapsed: 0.6866703033447266
DEBUG:base_model:epoch 10205 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10205, validation elapsed_time:0.08179306983947754
INFO:base_model:epoch 10205 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10206 Train Loss: 7930.8291015625 elapsed: 0.9042568206787109
DEBUG:base_model:epoch 10206 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10206, validation elapsed_time:0.11984777450561523
INFO:base_model:epoch 10206 val result: {'l

INFO:model.base_model:cur_epoch:10223, validation elapsed_time:0.11809587478637695
INFO:base_model:epoch 10223 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10224 Train Loss: 7944.6513671875 elapsed: 0.6239290237426758
DEBUG:base_model:epoch 10224 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10224, validation elapsed_time:0.09864687919616699
INFO:base_model:epoch 10224 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10225 Train Loss: 7910.09619140625 elapsed: 0.7467091083526611
DEBUG:base_model:epoch 10225 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10225, validation elapsed_time:0.13168787956237793
INFO:base_model:epoch 10225 val result: {'

INFO:model.base_model:cur_epoch:10242, validation elapsed_time:0.0781090259552002
INFO:base_model:epoch 10242 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10243 Train Loss: 7937.740234375 elapsed: 0.8051259517669678
DEBUG:base_model:epoch 10243 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10243, validation elapsed_time:0.21201515197753906
INFO:base_model:epoch 10243 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10244 Train Loss: 7910.09619140625 elapsed: 0.7677507400512695
DEBUG:base_model:epoch 10244 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10244, validation elapsed_time:0.0971078872680664
INFO:base_model:epoch 10244 val result: {'los

INFO:model.base_model:cur_epoch:10261, validation elapsed_time:0.09441804885864258
INFO:base_model:epoch 10261 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10262 Train Loss: 7930.8291015625 elapsed: 0.8003818988800049
DEBUG:base_model:epoch 10262 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10262, validation elapsed_time:0.15623998641967773
INFO:base_model:epoch 10262 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10263 Train Loss: 7910.09619140625 elapsed: 0.7554020881652832
DEBUG:base_model:epoch 10263 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7910.0962, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10263, validation elapsed_time:0.08711385726928711
INFO:base_model:epoch 10263 val result: {'

INFO:model.base_model:cur_epoch:10280, validation elapsed_time:0.10961627960205078
INFO:base_model:epoch 10280 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10281 Train Loss: 7917.00732421875 elapsed: 0.7750670909881592
DEBUG:base_model:epoch 10281 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10281, validation elapsed_time:0.12019705772399902
INFO:base_model:epoch 10281 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10282 Train Loss: 7917.00732421875 elapsed: 0.9300401210784912
DEBUG:base_model:epoch 10282 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10282, validation elapsed_time:0.16623711585998535
INFO:base_model:epoch 10282 val result: {

INFO:model.base_model:cur_epoch:10299, validation elapsed_time:0.10006022453308105
INFO:base_model:epoch 10299 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10300 Train Loss: 7923.91845703125 elapsed: 0.7326042652130127
DEBUG:base_model:epoch 10300 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10300, validation elapsed_time:0.09449577331542969
INFO:base_model:epoch 10300 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10301 Train Loss: 7937.740234375 elapsed: 0.7015559673309326
DEBUG:base_model:epoch 10301 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10301, validation elapsed_time:0.08228898048400879
INFO:base_model:epoch 10301 val result: {'l

INFO:model.base_model:cur_epoch:10318, validation elapsed_time:0.08166790008544922
INFO:base_model:epoch 10318 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10319 Train Loss: 7951.5625 elapsed: 0.7689247131347656
DEBUG:base_model:epoch 10319 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7951.5625, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10319, validation elapsed_time:0.21542072296142578
INFO:base_model:epoch 10319 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10320 Train Loss: 7937.740234375 elapsed: 0.753096342086792
DEBUG:base_model:epoch 10320 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7937.7402, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10320, validation elapsed_time:0.1045689582824707
INFO:base_model:epoch 10320 val result: {'loss': ten

INFO:model.base_model:cur_epoch:10337, validation elapsed_time:0.09502720832824707
INFO:base_model:epoch 10337 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10338 Train Loss: 7923.91845703125 elapsed: 0.7544591426849365
DEBUG:base_model:epoch 10338 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10338, validation elapsed_time:0.08027529716491699
INFO:base_model:epoch 10338 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10339 Train Loss: 7923.91845703125 elapsed: 0.7145998477935791
DEBUG:base_model:epoch 10339 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10339, validation elapsed_time:0.08184194564819336
INFO:base_model:epoch 10339 val result: {

INFO:model.base_model:cur_epoch:10356, validation elapsed_time:0.0829308032989502
INFO:base_model:epoch 10356 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10357 Train Loss: 7923.91845703125 elapsed: 0.8498270511627197
DEBUG:base_model:epoch 10357 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10357, validation elapsed_time:0.10839700698852539
INFO:base_model:epoch 10357 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10358 Train Loss: 7917.00732421875 elapsed: 0.7561101913452148
DEBUG:base_model:epoch 10358 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10358, validation elapsed_time:0.08163189888000488
INFO:base_model:epoch 10358 val result: {'

INFO:model.base_model:cur_epoch:10375, validation elapsed_time:0.08276629447937012
INFO:base_model:epoch 10375 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10376 Train Loss: 7896.27392578125 elapsed: 0.7929761409759521
DEBUG:base_model:epoch 10376 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7896.2739, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10376, validation elapsed_time:0.10935235023498535
INFO:base_model:epoch 10376 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10377 Train Loss: 7923.91845703125 elapsed: 0.9111018180847168
DEBUG:base_model:epoch 10377 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7923.9185, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10377, validation elapsed_time:0.11764788627624512
INFO:base_model:epoch 10377 val result: {

INFO:model.base_model:cur_epoch:10394, validation elapsed_time:0.13025712966918945
INFO:base_model:epoch 10394 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10395 Train Loss: 7930.8291015625 elapsed: 0.760991096496582
DEBUG:base_model:epoch 10395 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10395, validation elapsed_time:0.08033585548400879
INFO:base_model:epoch 10395 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10396 Train Loss: 7944.6513671875 elapsed: 0.6888470649719238
DEBUG:base_model:epoch 10396 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7944.6514, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10396, validation elapsed_time:0.08901691436767578
INFO:base_model:epoch 10396 val result: {'lo

INFO:model.base_model:cur_epoch:10413, validation elapsed_time:0.1301712989807129
INFO:base_model:epoch 10413 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10414 Train Loss: 7917.00732421875 elapsed: 0.7522292137145996
DEBUG:base_model:epoch 10414 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10414, validation elapsed_time:0.0785210132598877
INFO:base_model:epoch 10414 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10415 Train Loss: 7917.00732421875 elapsed: 0.6406867504119873
DEBUG:base_model:epoch 10415 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10415, validation elapsed_time:0.1834549903869629
INFO:base_model:epoch 10415 val result: {'lo

INFO:model.base_model:cur_epoch:10432, validation elapsed_time:0.08118033409118652
INFO:base_model:epoch 10432 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10433 Train Loss: 7930.8291015625 elapsed: 0.7019479274749756
DEBUG:base_model:epoch 10433 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7930.8291, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10433, validation elapsed_time:0.12673306465148926
INFO:base_model:epoch 10433 val result: {'loss': tensor(73.4883), 'auc': 0.5}
INFO:model.base_model:Epoch Step: 10434 Train Loss: 7917.00732421875 elapsed: 0.8388841152191162
DEBUG:base_model:epoch 10434 train finished. train_result_info:{'total_n_batch': 110, 'total_train_loss_by_token': tensor(7917.0073, grad_fn=<AddBackward0>), 'total_tokens': 2553}
INFO:model.base_model:cur_epoch:10434, validation elapsed_time:0.15594005584716797
INFO:base_model:epoch 10434 val result: {'

## Transformer encoder
3층

In [122]:
from model import transformer_encoder_model


In [117]:


batch_size = 128
epochs = 30000

train_loader = IsraelDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = IsraelDataLoader(val_dataset, batch_size=batch_size, shuffle=False)

len_seq = train_input[0].shape[1]
print("len_seq:", len_seq)
d_model = 8
n_head = 1
n_layer = 1
lr = 1e-4

encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, dropout=0.1)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layer)
model = transformer_encoder_model.MuliplyIntensityUsingCls(
    transformer_encoder=transformer_encoder,
    dict_category=dict_category,
    len_seq=len_seq,
)

model.eval()

print("d_model:", model.d_model, "n_head:", model.n_head, 'len_seq:', model.len_seq)
loss_func = nn.BCELoss(reduction='mean')
optimizer = optim.RAdam(model.parameters(), lr=lr)
config_name = f"big_layer{n_layer}_dmodel{d_model}_nhead{n_head}_lr{lr}"

train_log = dict()
val_log = dict()

val_metrics = {
    "loss": loss_func,
    'auc': lambda pred, gt: roc_auc_score(gt.numpy(), pred.numpy())
}


    

len_seq: 23
d_model: 8 n_head: 1 len_seq: 23


In [118]:
model

MuliplyIntensityUsingCls(
  (cat_embeding): Embedding(46, 8)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=8, out_features=8, bias=True)
        )
        (linear1): Linear(in_features=8, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=8, bias=True)
        (norm1): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (global_pooling): AvgPool1d(kernel_size=(23,), stride=(23,), padding=(0,))
  (final_fc): Linear(in_features=8, out_features=1, bias=True)
  (final_activation): Sigmoid()
)

In [70]:

for epoch in range(epochs):
    # **************** Train ****************
    train_result_info = transformer_encoder_model.MuliplyIntensityUsingCls.train_on_epoch(epoch, train_loader, model, loss_func, optimizer, verbose=True)
    train_log[f'epoch_train_{epoch}'] = train_result_info
    logger.debug(f"epoch {epoch} train finished. train_result_info:{train_result_info}")
    
    # ************** validation *********************
    val_result = transformer_encoder_model.MuliplyIntensityUsingCls.validation_on_epoch(epoch, model, val_loader, val_metrics)
    val_log[f'epoch_val_{epoch}'] = val_result
    logger.info(f"epoch {epoch} val result: {val_result}")

    if not val_log:
        best_model = save_model(config_name, model.copy(), epoch)
    elif val_result['auc'] >= max(val_log.values(), key=lambda x : x['auc'])['auc']:
        best_model = save_model(config_name, model.copy(), epoch)


Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128

Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128

Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])
Before final top:  torch.Size([128, 23, 16])
first token vector:  torch.Size([128, 16])
after final top out:  torch.Size([128, 1])
after activation out:  torch.Size([128, 1])


KeyboardInterrupt: 

In [71]:
val_result

{'loss': tensor(0.5794), 'auc': 0.5}

In [27]:
## Check the best model's validation performance
display(val_log)
print("Best validation performance:", max(val_log.values(), key=lambda x : x['auc']))

## Check the best model's test performance
test_loader = IsraelDataLoader(test_dataset, batch_size=64, shuffle=False)

best_model.eval()
test_result = best_model.validation_on_epoch(epoch, best_model, test_loader, val_metrics)
test_result

{'epoch_val_0': {'loss': tensor(0.6084), 'auc': 0.5},
 'epoch_val_1': {'loss': tensor(0.6134), 'auc': 0.5},
 'epoch_val_2': {'loss': tensor(0.5807), 'auc': 0.5},
 'epoch_val_3': {'loss': tensor(0.5815), 'auc': 0.5},
 'epoch_val_4': {'loss': tensor(0.5784), 'auc': 0.5},
 'epoch_val_5': {'loss': tensor(0.5784), 'auc': 0.5},
 'epoch_val_6': {'loss': tensor(0.5786), 'auc': 0.5},
 'epoch_val_7': {'loss': tensor(0.5957), 'auc': 0.5},
 'epoch_val_8': {'loss': tensor(0.6658), 'auc': 0.5},
 'epoch_val_9': {'loss': tensor(0.7118), 'auc': 0.5},
 'epoch_val_10': {'loss': tensor(0.5811), 'auc': 0.5},
 'epoch_val_11': {'loss': tensor(0.6707), 'auc': 0.5},
 'epoch_val_12': {'loss': tensor(0.6391), 'auc': 0.5},
 'epoch_val_13': {'loss': tensor(0.6086), 'auc': 0.5},
 'epoch_val_14': {'loss': tensor(0.8430), 'auc': 0.5},
 'epoch_val_15': {'loss': tensor(0.6344), 'auc': 0.5},
 'epoch_val_16': {'loss': tensor(0.7059), 'auc': 0.5},
 'epoch_val_17': {'loss': tensor(0.6741), 'auc': 0.5},
 'epoch_val_18': {'l

Best validation performance: {'loss': tensor(0.6546), 'auc': 0.5064395523948965}


INFO:model.base_model:cur_epoch:299, validation elapsed_time:0.8442840576171875


{'loss': tensor(2.1221), 'auc': 0.5074170816652019}

### 6층 짜리로 한번 가보자