In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import jieba
import re
import os
import time
import gc

from sklearn.model_selection import train_test_split, KFold
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer, BertModel, BertForSequenceClassification, BertConfig, get_linear_schedule_with_warmup
from tqdm import tqdm
from sklearn.metrics import f1_score, auc, roc_curve, classification_report

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
import sys
sys.path.append('..')

In [4]:
import config
from config import device, is_cuda

## dataset

In [5]:
class QAMatchDataset(Dataset):
    def __init__(self, df, tokenizer, max_seq_len, mode):
        assert mode in ['train', 'dev', 'test']

        self.mode = mode
        self.tokenizer = tokenizer
        self.df = df
        self.max_seq_len = max_seq_len
        # self.df = pd.read_csv(file)
        # self.seqs, self.seq_masks, self.seq_segments, self.labels = self.get_input(file)

    def __getitem__(self, idx):
        token_seq_1 = self.df.iloc[idx]['question']
        token_seq_2 = self.df.iloc[idx]['reply_content']
        if self.mode in ['train', 'dev']:
            label_tensor = torch.tensor(self.df.iloc[idx]['label'])
        else:
            label_tensor = None
            
        # 加入[CLS]和[SEP]，并进行截断，注意这里没有padding
        inputs =self.tokenizer.encode_plus(token_seq_1, token_seq_2, add_special_tokens=True, 
                                           max_length=self.max_seq_len, truncation='longest_first')
        
        seq = inputs['input_ids']
        seq_segment = inputs['token_type_ids']
#         seq_mask = [1] * len(seq)
        padding = [0] * (self.max_seq_len - len(seq))
        
        seq += padding
        seq_segment += padding
#         seq_mask += padding
        
        return torch.Tensor(seq).type(torch.long), torch.Tensor(seq_segment).type(torch.long), label_tensor
    
    def collate_fn(self, samples):
#         print(samples[0])
        seqs = torch.stack([s[0] for s in samples])
        seq_segments = torch.stack([s[1] for s in samples])

        if self.mode in ['train', 'dev']:
            labels = torch.stack([s[2] for s in samples])
        else:
            labels = None
            
        # attention mask处理
        seq_masks = torch.zeros(seqs.shape, dtype=torch.long)
        seq_masks = seq_masks.masked_fill(seqs != 0, 1)

        return seqs, seq_masks, seq_segments, labels
    def __len__(self):
        return len(self.df)

## model

In [6]:
class BertModelTrain(nn.Module):
    def __init__(self, params):
        super(BertModelTrain, self).__init__()        
        self.bert_config = BertConfig.from_pretrained(os.path.join(params['pretrained_model_path'], 'config.json'))
        self.bert_config.output_hidden_states = False
        self.bert = BertModel.from_pretrained(params['pretrained_model_path'], output_hidden_states=False)
        self.linear1 = nn.Linear(4*self.bert_config.hidden_size, self.bert_config.hidden_size)
        self.linear2 = nn.Linear(self.bert_config.hidden_size, 1)
        self.dropout = nn.Dropout(p=params['dropout_rate'])
        self.tanh = nn.Tanh()
        self.loss_fn = nn.BCELoss()
        for param in self.bert.parameters():
            param.requires_grad = True     # fine-tune，每个参数都要更新

    def forward(self, batch_seqs, batch_seq_masks, batch_seq_segments, labels=None):
        """
        :param batch_seqs: input_ids
        :param batch_seq_masks: attention_mask
        :param batch_seq_segments: token_type_ids
        :param labels:
        :return: outputs: (loss, logits, ...)
                 outputs: (logits, ...)
        """
        # last_hidden_state: (batch_size, sequence_length, hidden_size)
        q_embedding, pooler_output = self.bert(input_ids=batch_seqs,
                                attention_mask=batch_seq_masks,
                                token_type_ids=batch_seq_segments)[:2]
        # Avg pooling -> (batch_size, hidden_size)
        avg_pooled = torch.mean(q_embedding, dim=1)

        # Max pooling -> (batch_size, hidden_size)
        max_pooled = torch.max(q_embedding, dim=1)[0]

        # cls last hidden state: (batch_size, hidden_size)
        cls_hidden_state = q_embedding[:, 0]

        # concatenate this four tensor -> (batch_size, 3*hidden_size)
        x = torch.cat([pooler_output, avg_pooled, max_pooled, cls_hidden_state], dim=1)

        # dropout
        x =self.dropout(x)
        x = self.linear1(x)    # (batch, hidden_size)
        x = self.tanh(x)
        x = self.dropout(x)
        x = self.linear2(x)    # (batch, 1)
        # sigmoid
        output = torch.sigmoid(x)    # (batch_size, 1) 即模型预测每个样本为1的概率

        logits = x
        proba_0 = 1.0 - output     # (batch_size, 1)
        probabilities = torch.cat((proba_0, output), dim=1)   # (batch_size, 2)
        if labels is not None:
            # 有标签，则返回loss, logits, probabilities
            loss = self.loss_fn(output.squeeze(), labels.type(torch.float))
            outputs = (loss, logits, probabilities)
        else:
            # 无标签，则返回logits, probabilities
            outputs = (logits, probabilities)

        return outputs

In [7]:
def writeToLog(path, content):
    with open(path, 'a') as fp:
        fp.write(content)
        fp.write('\n')

## train

In [8]:
def train(train_dataloader, dev_dataloader, params, bert_tokenizer, best_model_path, output_path, fold,
          version, checkpoint=None):
    # ---------------------- Model definition ---------------------- #
    print("\t* Building model...")
    bulid_time = time.time()
    model = BertModelTrain(params).to(device)
    print("\t* Building model time:{:.4f}s".format(time.time()-bulid_time))
    # ---------------------- Preparation for training -------------- #
#     param_optimizer = list(model.named_parameters())
    # 这里，指定部分参数不参与权重衰减
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': params['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=params['lr'])
#     optimizer = Adam(model.parameters(), lr=lr)
#     optimizer = SGD(model.parameters(),lr=params['lr'],momentum=params['momentum'], weight_decay=params['l2_weight'])
#     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.85, patience=params['patience'])
    num_training_steps = len(train_dataloader) * params['epochs']
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=num_training_steps)

    best_score = 0.0    # 记录validation最好的结果
    best_thres = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epoch_count = []
    train_losses = []
    valid_losses = []
    train_f1s = []
    valid_f1s = []
    train_aucs = []
    valid_aucs = []
    best_model_saved_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')

    # Compute loss and accuracy before starting (or resuming) training
    # 如果准备start training，这里的valid结果就是预训练BERT（做fine-tune之前）对下游任务的效果
    # 如果准备resuming training，这里的valid结果就是上一次fine-tune的结果
    valid_loss, valid_accuracy, valid_f1, valid_auc, thres = validate(model, dev_dataloader)
    print("\t* Validation loss before training: {:.4f}, accuracy:{:.4f}, "
          "f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
          format(valid_loss, (valid_accuracy * 100), valid_f1, thres, valid_auc))
    print("\n", 20 * "=", "Training Bert model o device: {}".format(device), 20 * "=")

    patience_counter = 0
    for epoch in range(start_epoch, params['epochs']+1):
        print("-> Start epoch {}".format(epoch))
        writeToLog(output_path, "-> Start epoch {}".format(epoch))
        epoch_count.append(epoch)
        # train
        epoch_time, epoch_loss, epoch_accuracy, epoch_f1, epoch_auc = train_for_one_epoch(model,
                                                                                          train_dataloader,
                                                                                          optimizer,
                                                                                          scheduler,
                                                                                          params['max_gradient_norm'])
        train_losses.append(epoch_loss)
        train_f1s.append(epoch_f1)
        train_aucs.append(epoch_auc)
        print("-> Training time:{:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, auc: {:.4f}".
              format(epoch_time, epoch_loss, epoch_accuracy*100, epoch_f1, epoch_auc))
        writeToLog(output_path, "-> Training time:{:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, auc: {:.4f}".
              format(epoch_time, epoch_loss, epoch_accuracy*100, epoch_f1, epoch_auc))
        
        # validation
        valid_loss, valid_accuracy, valid_f1, valid_auc, thres = validate(model, dev_dataloader)
        print("-> Validation loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
              format(valid_loss, valid_accuracy * 100, valid_f1, thres, valid_auc))
        writeToLog(output_path, "-> Validation loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
              format(valid_loss, valid_accuracy * 100, valid_f1, thres, valid_auc))
        
        valid_losses.append(valid_loss)
        valid_f1s.append(valid_f1)
        valid_aucs.append(valid_auc)
#         scheduler.step(valid_loss)
        
        if valid_auc <= best_score:
            patience_counter += 1
        else:
            best_score = valid_auc
            best_thres = thres
            patience_counter = 0
            best_model_saved_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')
            torch.save({
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,    # k fold时以valid auc来看每折的模型的能力，从而对最终的预测结果进行加权平均
                "best_thres": best_thres,
                "epochs_count": epoch_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            }, best_model_saved_path)

        if patience_counter >= params['early_stoping']:
            print("-> Early stopping: patience limit reached, stopping...")
            break
            
    if patience_counter != 0:
        # 如果最后一个epoch不是最好的模型，则读取之前的最好的模型
        best_checkpoint = torch.load(best_model_saved_path)
        model.load_state_dict(best_checkpoint['model'])
#     return model, best_score, epoch_count, train_losses, train_f1s, train_aucs, valid_losses, valid_f1s, valid_aucs
    return model, best_score


def train_for_one_epoch(model, dataloader, optimizer, scheduler, max_gradient_norm):
    model.train()

    epoch_start_time = time.time()
    running_loss = 0.0   # 记录整个epoch的累加loss
    correct_count = 0.0
    batch_avg_time = 0.0 # 记录该epoch平均batch花费时间
    all_preds = []
    all_pred_probas = []
    all_labels = []

    tqdm_dataloader = tqdm(dataloader)
    for batch_index, data in enumerate(tqdm_dataloader):
        batch_start_time = time.time()
        if is_cuda:
            data = [t.to(device) for t in data if t is not None]
        # 梯度置零
        optimizer.zero_grad()
        seqs, seq_masks, seq_segments, labels = data
        outputs = model(seqs, seq_masks, seq_segments, labels)
        # 回传梯度
        loss = outputs[0]
        logits = outputs[1]
        probabilities = outputs[2]
        # probabilities = nn.functional.softmax(logits, dim=-1)
        loss.backward()
        # 梯度裁剪
        nn.utils.clip_grad_norm_(model.parameters(), max_gradient_norm)
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()
        pred = torch.argmax(probabilities, dim=1)
        correct_count = correct_count + (pred == labels).sum().item()
        batch_avg_time += time.time() - batch_start_time
        all_preds.append(pred.cpu())
        all_labels.append(labels.cpu())
        all_pred_probas.append(probabilities.detach().cpu())

        description = "Batch num: {}. Avg. batch proc. time: {:.4f}s, loss: {:.4f}".\
            format(batch_index+1, batch_avg_time/(batch_index+1), running_loss/(batch_index+1))
        tqdm_dataloader.set_description(description)
#         del data
#         torch.cuda.empty_cache()
        
    all_labels = torch.cat(all_labels)    # 把每个batch的labels平铺成一维tensor (samples, )
    all_preds = torch.cat(all_preds)      # 把每个batch的preds平铺成一维tensor (samples, )
    all_pred_probas = torch.cat(all_pred_probas) # 把每个batch的probas平铺成tensor (samples, 2)

    fpr, tpr, thresholds = roc_curve(all_labels, all_pred_probas[:, 1], pos_label=1)

    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = correct_count / len(dataloader.dataset)
    epoch_time = time.time() - epoch_start_time
    epoch_auc = auc(fpr, tpr)
    epoch_f1 = f1_score(all_labels, all_preds)

    return epoch_time, epoch_loss, epoch_accuracy, epoch_f1, epoch_auc
#     return epoch_time, epoch_loss, epoch_accuracy, 0, epoch_auc


def validate(model, dataloader):
    model.eval()
    running_loss = 0.0  # 记录整个epoch的累加loss
    correct_count = 0.0
    # all_preds = []
    all_labels = []
    all_pred_probas = []
    tqdm_dataloader = tqdm(dataloader)

    # Deactivate autograd for evaluation
    with torch.no_grad():   # 必须加这个，减少显存的使用
        for batch_index, data in enumerate(tqdm_dataloader):
            if is_cuda:
                data = [t.to(device) for t in data if t is not None]

            seqs, seq_masks, seq_segments, labels = data
            outputs = model(seqs, seq_masks, seq_segments, labels)
            loss = outputs[0]
            logits = outputs[1]
            probabilities = outputs[2]
            # probabilities = nn.functional.softmax(logits, dim=-1)

            running_loss += loss.item()
            # _, pred = torch.max(logits, dim=1)

            # correct_count = correct_count + (pred == labels).sum().item()
            # all_preds.append(pred.cpu())
            all_labels.append(labels.cpu())
            all_pred_probas.append(probabilities.cpu())
            
#             del data
#             torch.cuda.empty_cache()
                    
    all_labels = torch.cat(all_labels)  # 把每个batch的labels平铺成一维tensor shape: (samples, )
    # all_preds = torch.cat(all_preds)  # 把每个batch的preds平铺成一维tensor shape: (samples, )
    all_pred_probas = torch.cat(all_pred_probas)  # 把每个batch的probas变成tensor（原来是[tensor, tensor, ...]）


    # best_f1, best_thres = search_f1(all_labels, all_pred_probas[:, 1])
    # all_preds = (all_pred_probas[:, 1] > best_thres).type(torch.long)
    all_preds = torch.argmax(all_pred_probas, dim=1)
    correct_count = (all_preds == all_labels).sum().item()

    fpr, tpr, thresholds = roc_curve(all_labels, all_pred_probas[:, 1], pos_label=1)

    valid_loss = running_loss / len(dataloader)
    valid_acc = correct_count / len(dataloader.dataset)
    valid_f1 = f1_score(all_labels, all_preds)
    # valid_f1 = best_f1
    valid_auc = auc(fpr, tpr)
    best_thres = 0
    return valid_loss, valid_acc, valid_f1, valid_auc, best_thres
    # return valid_loss, valid_acc, 0, 0
    
def search_f1(y_true, y_pred):
    """

    :param y_true: 一维tensor
    :param y_pred: 一维tensor，y_pred[i]表示第i个样本在label为1上的预测概率
    :return:
    """
    best_score = 0.0
    best_thres = 0.0
    for i in range(30, 70):
        thres = i / 100
        y_pred_bin = (y_pred > thres)   # 大于thres的为1，小于thres的为0
        # print("y_pred_bin shape:", y_pred_bin.shape)
        score = f1_score(y_true, y_pred_bin)
        if score > best_score:
            best_score = score
            best_thres = thres

    return best_score, best_thres
    
def get_pred_probas(model, dataloader, is_test=False):
    model.eval()
    probas = None
    all_labels = []
    with torch.no_grad():
        for data in dataloader:
            # 将所有tensors移到GPU上
            if is_cuda:
                data = [t.to(device) for t in data if t is not None]
                
            if is_test:
                seqs, seq_masks, seq_segments = data[:3]
            else:
                seqs, seq_masks, seq_segments, labels = data
                all_labels.append(labels)
            outputs = model(seqs,
                            seq_masks,
                            seq_segments)
            logits = outputs[0]
            probabilities = outputs[1]   # (batch, 2)

            if probas is None:
                probas = probabilities
            else:
                # 将每个batch的预测结果拼接起来
                probas = torch.cat([probas, probabilities])
    if is_test:
        return probas.cpu()
    all_labels = torch.cat(all_labels)  # (len, )
    return probas.cpu(), all_labels.cpu()

## KFold

In [9]:
def k_fold_cross_val(train_df, test_df, params, k, bert_tokenizer, best_model_path, output_path, version):
    kf = KFold(n_splits=k)
    test_dataset = QAMatchDataset(test_df, bert_tokenizer, params['max_seq_len'], mode='test')
    test_dataloader = DataLoader(test_dataset, batch_size=512, num_workers=3, collate_fn=test_dataset.collate_fn)
    dev_labels = []
    dev_probas = []
    k_test_probas = []
    k_best_scores = []
    for fold, (train_idxs, dev_idxs) in enumerate(kf.split(train_df)):
        print("\t* Start "+str(fold)+" fold")
        writeToLog(output_path, "\t* Start "+str(fold)+" fold")
#         dev_labels.extend(train_df.iloc[dev_idxs]['label'].tolist())
        # ---------------------- Data loading -------------------------- #
        print("\t* Building dataset...")
        train_dataset = QAMatchDataset(train_df.iloc[train_idxs], bert_tokenizer, params['max_seq_len'], 'train')
        dev_dataset = QAMatchDataset(train_df.iloc[dev_idxs], bert_tokenizer, params['max_seq_len'], 'dev')

        train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'], num_workers=3,
                                      collate_fn=train_dataset.collate_fn)
        dev_dataloader = DataLoader(dev_dataset, batch_size=512, num_workers=3,
                                    collate_fn=dev_dataset.collate_fn)
        best_model_fold_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')
        checkpoint = None
        if not(os.path.exists(best_model_fold_path)):
            # 若没有
            
            model, best_score = train(train_dataloader, dev_dataloader, params, bert_tokenizer, best_model_path, output_path, 
                                      fold, version, checkpoint=None)
        else:
            checkpoint = torch.load(best_model_fold_path)
            model = BertModelTrain(params).to(device)
            model.load_state_dict(checkpoint['model'])
            best_score = checkpoint['best_score']
        k_best_scores.append(best_score)
        
        fold_dev_proba, dev_label = get_pred_probas(model, dev_dataloader)
        for idx, proba in zip(dev_idxs, fold_dev_proba):
            train_df.loc[idx, 'proba_0'] = proba[0].item()
            train_df.loc[idx, 'proba_1'] = proba[1].item()
        fold_test_proba = get_pred_probas(model, test_dataloader, is_test=True)
        
        dev_labels.append(dev_label)
        dev_probas.append(fold_dev_proba)  # (k, len(dev_idxs), 2)
        k_test_probas.append(fold_test_proba) # (k, len(test_dataset), 2)
#         model.to(torch.device('cpu'))
        del model, train_dataloader, dev_dataloader, checkpoint
        torch.cuda.empty_cache() 
        time.sleep(5)
    
    dev_labels = torch.cat(dev_labels)  # (len(train_df),)      # 把每一折的验证集的label拼接，得到整个训练集的label
    dev_probas = torch.cat(dev_probas)  # (len(train_df), 2)    # 把每一折的验证集的预测结果拼接，得到整个训练集的预测结果
    
    k_test_probas = torch.stack(k_test_probas) # (k, len(test_dataset), 2)， 只是把[tensor, tensor, ... ]转为tensor
#     test_probas = torch.mean(k_test_probas, dim=0)  # (len(test_dataset), 2)  取每一折的平均

    # k折模型加权融合
    k_best_scores = np.array(k_best_scores)              
    k_weights = k_best_scores / k_best_scores.sum()             # (k,)
    k_weights = np.expand_dims(np.expand_dims(k_weights,1),1)   # (k, 1, 1)
    print('k_best_score :', k_best_scores)
    print('k weights :', k_weights)
    k_test_probas = k_test_probas * k_weights               # 广播机制，使得每个模型预测的概率乘上该模型的权重 (k, len(test_dataset), 2)
    test_probas = torch.sum(k_test_probas, dim=0)           # 求和
    # search f1
    best_f1, best_thres = search_f1(dev_labels, dev_probas[:, 1])
    print(best_f1, best_thres)
    test_preds = (test_probas[:, 1] > best_thres).type(torch.long)
    
    # 不用search f1
    # test_preds = torch.argmax(test_probas, dim=1) 
    return test_preds, k_test_probas, dev_probas, dev_labels, best_f1, best_thres

## 操作

In [10]:
model_version = 'FFTPD-5fold-V2.9'     # 模型版本
scheme_version = 'FFTPD-5fold-V2.9'     # 方案版本
# train_df = pd.read_csv(train_all_path)
train_df = pd.read_csv(config.augmented_V0204_path)
# test_df = pd.read_csv(test_path)
# train_df = pd.read_csv(train_V0_path)
test_df = pd.read_csv(config.test_V0_path)
k = 5

params = {
    'batch_size': 32,
    'epochs': 20,
    'lr': 2e-05,
    'l2_weight':0,
    'weight_decay': 0.01,
    'dropout_rate': 0.5,
    'momentum': 0.8,
    'early_stoping':5,
    'patience': 3,
    'lstm_hidden_size': 512,
    'num_directions': 2,
    'max_seq_len': config.max_seq_len,
    'max_gradient_norm': 10.0,
    'pretrained_model_path': config.pretrained_roberta_wwm_ext_large_path, 
}

bert_tokenizer = BertTokenizer.from_pretrained(os.path.join(params['pretrained_model_path'], 'vocab.txt'))
output_path = os.path.join(config.root_path, 'output/'+scheme_version+'.txt')

print("\t* K fold training and validating...")
test_preds, k_test_probas, dev_probas, dev_labels, best_f1, best_thres = k_fold_cross_val(train_df, test_df, params, k, 
                                                                                          bert_tokenizer, config.best_model_path, 
                                                                                          output_path, model_version)
dev_preds = (dev_probas[:, 1] > best_thres).type(torch.long)
fpr, tpr, thresholds = roc_curve(dev_labels, dev_probas[:, 1], pos_label=1)
dev_auc = auc(fpr, tpr)
print('dev auc: ',dev_auc)

print("\t* Saving dev result...")
with open(os.path.join(config.root_path, 'report/'+scheme_version+'_'+'classification_report.txt'), 'w') as fp:
    fp.write(classification_report(dev_labels, dev_preds))
    fp.write('\n')
    fp.write('f1-score: {:.4f}'.format(f1_score(dev_labels, dev_preds)))
    fp.write(' auc: {:.4f}'.format(dev_auc))

train_df.to_csv(os.path.join(config.root_path, 'result/'+scheme_version+'_pred_result.csv'), index=0)

print("\t* Predicting...")
test_df['pred'] = test_preds.cpu().numpy()
k_test_probas = k_test_probas.cpu().numpy()

print("\t* Saving test result...")
# 保存预测结果
time_str = '' + time.strftime("%Y%m%d%H%M", time.localtime())
test_df[['dialog_id', 'reply_id', 'pred']].to_csv(os.path.join(config.root_path,'submission/'+scheme_version+'_'+time_str+'.csv'),
                                                  sep='\t',
                                                  index=0,
                                                  header=0)
# 保存K折预测概率结果
k_test_probas_path = os.path.join(config.root_path, 'result/'+scheme_version+'_'+str(k)+'_test_probas.npz')
# if not os.path.exists(k_test_probas_path):
np.save(k_test_probas_path, k_test_probas)

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


	* K fold training and validating...
	* Start 0 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:10.9124s


100%|██████████| 17/17 [00:32<00:00,  1.78s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.7356, accuracy:25.3301, f1_score: 0.3908, best_thres: 0.0000, auc: 0.4542

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4778s, loss: 0.3594: 100%|██████████| 1080/1080 [08:40<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:520.3491s, loss: 0.3594, accuracy: 84.6181%, f1_score: 0.6632, auc: 0.8812


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2675, accuracy: 88.7653%, f1_score: 0.7621, best_thres: 0.0000, auc: 0.9377


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4796s, loss: 0.2048: 100%|██████████| 1080/1080 [08:42<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.3156s, loss: 0.2048, accuracy: 92.0281%, f1_score: 0.8406, auc: 0.9639


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2797, accuracy: 89.4950%, f1_score: 0.7931, best_thres: 0.0000, auc: 0.9483


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4812s, loss: 0.1195: 100%|██████████| 1080/1080 [08:43<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.0511s, loss: 0.1195, accuracy: 95.5522%, f1_score: 0.9120, auc: 0.9873


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2968, accuracy: 89.9236%, f1_score: 0.8044, best_thres: 0.0000, auc: 0.9508


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.0777: 100%|██████████| 1080/1080 [08:42<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.0206s, loss: 0.0777, accuracy: 97.2433%, f1_score: 0.9454, auc: 0.9943


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4124, accuracy: 90.4795%, f1_score: 0.8170, best_thres: 0.0000, auc: 0.9512


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4792s, loss: 0.0577: 100%|██████████| 1080/1080 [08:41<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.7467s, loss: 0.0577, accuracy: 98.0859%, f1_score: 0.9620, auc: 0.9964


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4155, accuracy: 91.0239%, f1_score: 0.8216, best_thres: 0.0000, auc: 0.9546


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4798s, loss: 0.0410: 100%|██████████| 1080/1080 [08:42<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.6096s, loss: 0.0410, accuracy: 98.6245%, f1_score: 0.9727, auc: 0.9981


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3848, accuracy: 91.0934%, f1_score: 0.8229, best_thres: 0.0000, auc: 0.9537
-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4809s, loss: 0.0355: 100%|██████████| 1080/1080 [08:43<00:00,  2.48it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.4656s, loss: 0.0355, accuracy: 98.8012%, f1_score: 0.9762, auc: 0.9985


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4622, accuracy: 90.8501%, f1_score: 0.8171, best_thres: 0.0000, auc: 0.9513
-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4806s, loss: 0.0277: 100%|██████████| 1080/1080 [08:43<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2082s, loss: 0.0277, accuracy: 99.0908%, f1_score: 0.9819, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4437, accuracy: 91.5335%, f1_score: 0.8281, best_thres: 0.0000, auc: 0.9551


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4807s, loss: 0.0250: 100%|██████████| 1080/1080 [08:43<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.3669s, loss: 0.0250, accuracy: 99.1805%, f1_score: 0.9837, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4323, accuracy: 91.6377%, f1_score: 0.8331, best_thres: 0.0000, auc: 0.9578


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4800s, loss: 0.0182: 100%|██████████| 1080/1080 [08:42<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.5630s, loss: 0.0182, accuracy: 99.3398%, f1_score: 0.9869, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5468, accuracy: 91.8578%, f1_score: 0.8336, best_thres: 0.0000, auc: 0.9551
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4789s, loss: 0.0177: 100%|██████████| 1080/1080 [08:41<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.3409s, loss: 0.0177, accuracy: 99.3774%, f1_score: 0.9876, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5039, accuracy: 91.8694%, f1_score: 0.8284, best_thres: 0.0000, auc: 0.9568
-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.0141: 100%|██████████| 1080/1080 [08:42<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.8370s, loss: 0.0141, accuracy: 99.5019%, f1_score: 0.9901, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5692, accuracy: 91.9388%, f1_score: 0.8384, best_thres: 0.0000, auc: 0.9567
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4796s, loss: 0.0113: 100%|██████████| 1080/1080 [08:41<00:00,  2.60it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.0811s, loss: 0.0113, accuracy: 99.5367%, f1_score: 0.9908, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5685, accuracy: 92.0083%, f1_score: 0.8376, best_thres: 0.0000, auc: 0.9566
-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4811s, loss: 0.0106: 100%|██████████| 1080/1080 [08:43<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.6811s, loss: 0.0106, accuracy: 99.6149%, f1_score: 0.9923, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.5828, accuracy: 91.9157%, f1_score: 0.8368, best_thres: 0.0000, auc: 0.9565
-> Early stopping: patience limit reached, stopping...
	* Start 1 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.3997s


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.6727, accuracy:71.1026, f1_score: 0.0963, best_thres: 0.0000, auc: 0.4615

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4788s, loss: 0.3486: 100%|██████████| 1080/1080 [08:41<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.2841s, loss: 0.3486, accuracy: 85.1248%, f1_score: 0.6760, auc: 0.8898


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2754, accuracy: 88.8696%, f1_score: 0.7703, best_thres: 0.0000, auc: 0.9329


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.2043: 100%|██████████| 1080/1080 [08:42<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.1400s, loss: 0.2043, accuracy: 91.9615%, f1_score: 0.8388, auc: 0.9639


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2681, accuracy: 89.2750%, f1_score: 0.7987, best_thres: 0.0000, auc: 0.9489


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4811s, loss: 0.1206: 100%|██████████| 1080/1080 [08:43<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.9505s, loss: 0.1206, accuracy: 95.6130%, f1_score: 0.9132, auc: 0.9866


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2778, accuracy: 90.3174%, f1_score: 0.8109, best_thres: 0.0000, auc: 0.9520


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4813s, loss: 0.0834: 100%|██████████| 1080/1080 [08:44<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.7547s, loss: 0.0834, accuracy: 97.1014%, f1_score: 0.9426, auc: 0.9933


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3230, accuracy: 90.9080%, f1_score: 0.8207, best_thres: 0.0000, auc: 0.9553


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4835s, loss: 0.0569: 100%|██████████| 1080/1080 [08:47<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.2159s, loss: 0.0569, accuracy: 98.0425%, f1_score: 0.9612, auc: 0.9966


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3507, accuracy: 91.6261%, f1_score: 0.8268, best_thres: 0.0000, auc: 0.9512
-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4819s, loss: 0.0428: 100%|██████████| 1080/1080 [08:44<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.8827s, loss: 0.0428, accuracy: 98.5493%, f1_score: 0.9712, auc: 0.9978


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3977, accuracy: 90.5258%, f1_score: 0.8193, best_thres: 0.0000, auc: 0.9543
-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4832s, loss: 0.0371: 100%|██████████| 1080/1080 [08:46<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.3246s, loss: 0.0371, accuracy: 98.7780%, f1_score: 0.9757, auc: 0.9982


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4214, accuracy: 91.6725%, f1_score: 0.8319, best_thres: 0.0000, auc: 0.9538
-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4806s, loss: 0.0299: 100%|██████████| 1080/1080 [08:43<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.7028s, loss: 0.0299, accuracy: 99.0502%, f1_score: 0.9811, auc: 0.9986


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4334, accuracy: 90.6069%, f1_score: 0.8219, best_thres: 0.0000, auc: 0.9536
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4808s, loss: 0.0245: 100%|██████████| 1080/1080 [08:43<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.7680s, loss: 0.0245, accuracy: 99.1545%, f1_score: 0.9832, auc: 0.9993


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4601, accuracy: 91.6840%, f1_score: 0.8352, best_thres: 0.0000, auc: 0.9532
-> Early stopping: patience limit reached, stopping...
	* Start 2 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.2862s


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.6641, accuracy:72.1682, f1_score: 0.0935, best_thres: 0.0000, auc: 0.5245

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4809s, loss: 0.3501: 100%|██████████| 1080/1080 [08:44<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.4625s, loss: 0.3501, accuracy: 85.2290%, f1_score: 0.6756, auc: 0.8875


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2643, accuracy: 88.8001%, f1_score: 0.7770, best_thres: 0.0000, auc: 0.9405


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4812s, loss: 0.2057: 100%|██████████| 1080/1080 [08:44<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.2930s, loss: 0.2057, accuracy: 91.9500%, f1_score: 0.8370, auc: 0.9635


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2502, accuracy: 90.2363%, f1_score: 0.8082, best_thres: 0.0000, auc: 0.9492


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4791s, loss: 0.1268: 100%|██████████| 1080/1080 [08:41<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.8004s, loss: 0.1268, accuracy: 95.3987%, f1_score: 0.9079, auc: 0.9855


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.2910, accuracy: 90.3984%, f1_score: 0.8152, best_thres: 0.0000, auc: 0.9506


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4797s, loss: 0.0857: 100%|██████████| 1080/1080 [08:42<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.5707s, loss: 0.0857, accuracy: 97.0551%, f1_score: 0.9412, auc: 0.9929


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3079, accuracy: 90.9544%, f1_score: 0.8226, best_thres: 0.0000, auc: 0.9529


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4809s, loss: 0.0640: 100%|██████████| 1080/1080 [08:43<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.8719s, loss: 0.0640, accuracy: 97.7095%, f1_score: 0.9543, auc: 0.9958


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3796, accuracy: 91.1513%, f1_score: 0.8165, best_thres: 0.0000, auc: 0.9502
-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4822s, loss: 0.0486: 100%|██████████| 1080/1080 [08:45<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.1907s, loss: 0.0486, accuracy: 98.3668%, f1_score: 0.9673, auc: 0.9975


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4814, accuracy: 90.5606%, f1_score: 0.8268, best_thres: 0.0000, auc: 0.9543


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4806s, loss: 0.0368: 100%|██████████| 1080/1080 [08:43<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.4487s, loss: 0.0368, accuracy: 98.8157%, f1_score: 0.9763, auc: 0.9982


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4688, accuracy: 91.0934%, f1_score: 0.8319, best_thres: 0.0000, auc: 0.9550


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4795s, loss: 0.0296: 100%|██████████| 1080/1080 [08:42<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.3597s, loss: 0.0296, accuracy: 98.9865%, f1_score: 0.9797, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4954, accuracy: 91.7304%, f1_score: 0.8395, best_thres: 0.0000, auc: 0.9500
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4805s, loss: 0.0267: 100%|██████████| 1080/1080 [08:43<00:00,  2.60it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.3446s, loss: 0.0267, accuracy: 99.0821%, f1_score: 0.9816, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4944, accuracy: 91.4061%, f1_score: 0.8340, best_thres: 0.0000, auc: 0.9520
-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4811s, loss: 0.0242: 100%|██████████| 1080/1080 [08:43<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.1678s, loss: 0.0242, accuracy: 99.1834%, f1_score: 0.9836, auc: 0.9993


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4487, accuracy: 92.2284%, f1_score: 0.8448, best_thres: 0.0000, auc: 0.9550
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4806s, loss: 0.0206: 100%|██████████| 1080/1080 [08:43<00:00,  2.60it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.4206s, loss: 0.0206, accuracy: 99.2761%, f1_score: 0.9855, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4837, accuracy: 91.9968%, f1_score: 0.8399, best_thres: 0.0000, auc: 0.9561


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4813s, loss: 0.0153: 100%|██████████| 1080/1080 [08:44<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.2098s, loss: 0.0153, accuracy: 99.4382%, f1_score: 0.9887, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5332, accuracy: 92.0199%, f1_score: 0.8416, best_thres: 0.0000, auc: 0.9541
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4788s, loss: 0.0123: 100%|██████████| 1080/1080 [08:41<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.4843s, loss: 0.0123, accuracy: 99.5396%, f1_score: 0.9908, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.5014, accuracy: 91.8694%, f1_score: 0.8372, best_thres: 0.0000, auc: 0.9561


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4799s, loss: 0.0110: 100%|██████████| 1080/1080 [08:42<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.7104s, loss: 0.0110, accuracy: 99.5656%, f1_score: 0.9913, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5618, accuracy: 92.1126%, f1_score: 0.8421, best_thres: 0.0000, auc: 0.9560
-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.0093: 100%|██████████| 1080/1080 [08:43<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2498s, loss: 0.0093, accuracy: 99.6178%, f1_score: 0.9923, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5709, accuracy: 92.3674%, f1_score: 0.8424, best_thres: 0.0000, auc: 0.9529
-> Start epoch 16


Batch num: 1080. Avg. batch proc. time: 0.4802s, loss: 0.0083: 100%|██████████| 1080/1080 [08:42<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.0903s, loss: 0.0083, accuracy: 99.6699%, f1_score: 0.9934, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5786, accuracy: 92.0894%, f1_score: 0.8379, best_thres: 0.0000, auc: 0.9495
-> Start epoch 17


Batch num: 1080. Avg. batch proc. time: 0.4798s, loss: 0.0071: 100%|██████████| 1080/1080 [08:42<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.6817s, loss: 0.0071, accuracy: 99.6670%, f1_score: 0.9933, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.6062, accuracy: 92.2516%, f1_score: 0.8467, best_thres: 0.0000, auc: 0.9507
-> Start epoch 18


Batch num: 1080. Avg. batch proc. time: 0.4786s, loss: 0.0052: 100%|██████████| 1080/1080 [08:41<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.3721s, loss: 0.0052, accuracy: 99.7133%, f1_score: 0.9942, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.6230, accuracy: 92.4021%, f1_score: 0.8459, best_thres: 0.0000, auc: 0.9471
-> Early stopping: patience limit reached, stopping...
	* Start 3 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.9310s


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.7148, accuracy:31.4259, f1_score: 0.3994, best_thres: 0.0000, auc: 0.5393

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4808s, loss: 0.3619: 100%|██████████| 1080/1080 [08:43<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.6235s, loss: 0.3619, accuracy: 84.6011%, f1_score: 0.6589, auc: 0.8785


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.2733, accuracy: 88.7061%, f1_score: 0.7815, best_thres: 0.0000, auc: 0.9390


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4798s, loss: 0.2208: 100%|██████████| 1080/1080 [08:42<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.5520s, loss: 0.2208, accuracy: 91.4521%, f1_score: 0.8263, auc: 0.9578


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.2698, accuracy: 89.8181%, f1_score: 0.8104, best_thres: 0.0000, auc: 0.9522


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4796s, loss: 0.1310: 100%|██████████| 1080/1080 [08:42<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.4445s, loss: 0.1310, accuracy: 95.0832%, f1_score: 0.9017, auc: 0.9848


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.2866, accuracy: 90.9997%, f1_score: 0.8242, best_thres: 0.0000, auc: 0.9524


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4799s, loss: 0.0845: 100%|██████████| 1080/1080 [08:42<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.8434s, loss: 0.0845, accuracy: 97.0001%, f1_score: 0.9402, auc: 0.9932


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.3854, accuracy: 90.7564%, f1_score: 0.8133, best_thres: 0.0000, auc: 0.9525


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4811s, loss: 0.0642: 100%|██████████| 1080/1080 [08:43<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.1038s, loss: 0.0642, accuracy: 97.7414%, f1_score: 0.9549, auc: 0.9954


100%|██████████| 17/17 [00:32<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4422, accuracy: 90.5942%, f1_score: 0.8242, best_thres: 0.0000, auc: 0.9483
-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4795s, loss: 0.0479: 100%|██████████| 1080/1080 [08:42<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.3452s, loss: 0.0479, accuracy: 98.3929%, f1_score: 0.9679, auc: 0.9975


100%|██████████| 17/17 [00:32<00:00,  1.83s/it]


-> Validation loss: 0.3939, accuracy: 91.1734%, f1_score: 0.8331, best_thres: 0.0000, auc: 0.9533


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4818s, loss: 0.0388: 100%|██████████| 1080/1080 [08:44<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.8701s, loss: 0.0388, accuracy: 98.6825%, f1_score: 0.9736, auc: 0.9982


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.4359, accuracy: 91.5325%, f1_score: 0.8347, best_thres: 0.0000, auc: 0.9535


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.0324: 100%|██████████| 1080/1080 [08:42<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.1608s, loss: 0.0324, accuracy: 98.9692%, f1_score: 0.9793, auc: 0.9986


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.4361, accuracy: 91.5788%, f1_score: 0.8375, best_thres: 0.0000, auc: 0.9553


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4790s, loss: 0.0256: 100%|██████████| 1080/1080 [08:41<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.6531s, loss: 0.0256, accuracy: 99.1053%, f1_score: 0.9821, auc: 0.9990


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.4229, accuracy: 91.6367%, f1_score: 0.8386, best_thres: 0.0000, auc: 0.9555


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4809s, loss: 0.0234: 100%|██████████| 1080/1080 [08:43<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.7195s, loss: 0.0234, accuracy: 99.2269%, f1_score: 0.9845, auc: 0.9992


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4695, accuracy: 91.9032%, f1_score: 0.8433, best_thres: 0.0000, auc: 0.9530
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4803s, loss: 0.0194: 100%|██████████| 1080/1080 [08:42<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.0887s, loss: 0.0194, accuracy: 99.3688%, f1_score: 0.9874, auc: 0.9992


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4981, accuracy: 91.8568%, f1_score: 0.8368, best_thres: 0.0000, auc: 0.9548
-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4807s, loss: 0.0171: 100%|██████████| 1080/1080 [08:43<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.6366s, loss: 0.0171, accuracy: 99.4006%, f1_score: 0.9880, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5627, accuracy: 91.5788%, f1_score: 0.8334, best_thres: 0.0000, auc: 0.9516
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4804s, loss: 0.0130: 100%|██████████| 1080/1080 [08:42<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2194s, loss: 0.0130, accuracy: 99.5454%, f1_score: 0.9909, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5696, accuracy: 92.3318%, f1_score: 0.8491, best_thres: 0.0000, auc: 0.9470
-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4813s, loss: 0.0119: 100%|██████████| 1080/1080 [08:43<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.1833s, loss: 0.0119, accuracy: 99.5946%, f1_score: 0.9919, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]


-> Validation loss: 0.5417, accuracy: 92.0422%, f1_score: 0.8399, best_thres: 0.0000, auc: 0.9499
-> Early stopping: patience limit reached, stopping...
	* Start 4 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.4618s


100%|██████████| 17/17 [00:33<00:00,  1.83s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.7805, accuracy:25.5415, f1_score: 0.4069, best_thres: 0.0000, auc: 0.4883

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4805s, loss: 0.3540: 100%|██████████| 1080/1080 [08:43<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2629s, loss: 0.3540, accuracy: 84.7285%, f1_score: 0.6636, auc: 0.8841


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3016, accuracy: 88.0806%, f1_score: 0.7357, best_thres: 0.0000, auc: 0.9393


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4808s, loss: 0.2109: 100%|██████████| 1080/1080 [08:43<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.6582s, loss: 0.2109, accuracy: 91.5159%, f1_score: 0.8268, auc: 0.9618


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3172, accuracy: 89.5865%, f1_score: 0.7804, best_thres: 0.0000, auc: 0.9469


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4800s, loss: 0.1312: 100%|██████████| 1080/1080 [08:42<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.7810s, loss: 0.1312, accuracy: 95.0804%, f1_score: 0.9010, auc: 0.9849


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3060, accuracy: 90.5942%, f1_score: 0.8225, best_thres: 0.0000, auc: 0.9529


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4808s, loss: 0.0853: 100%|██████████| 1080/1080 [08:43<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.7074s, loss: 0.0853, accuracy: 96.8756%, f1_score: 0.9374, auc: 0.9933


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3655, accuracy: 89.9571%, f1_score: 0.8173, best_thres: 0.0000, auc: 0.9522
-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4805s, loss: 0.0625: 100%|██████████| 1080/1080 [08:43<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2749s, loss: 0.0625, accuracy: 97.8109%, f1_score: 0.9560, auc: 0.9963


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4092, accuracy: 89.7602%, f1_score: 0.8157, best_thres: 0.0000, auc: 0.9451
-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4798s, loss: 0.0489: 100%|██████████| 1080/1080 [08:42<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.6432s, loss: 0.0489, accuracy: 98.3061%, f1_score: 0.9660, auc: 0.9975


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4462, accuracy: 91.1502%, f1_score: 0.8295, best_thres: 0.0000, auc: 0.9504
-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4802s, loss: 0.0373: 100%|██████████| 1080/1080 [08:42<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.1041s, loss: 0.0373, accuracy: 98.7781%, f1_score: 0.9755, auc: 0.9985


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3952, accuracy: 91.3356%, f1_score: 0.8327, best_thres: 0.0000, auc: 0.9529


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4821s, loss: 0.0327: 100%|██████████| 1080/1080 [08:45<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.1544s, loss: 0.0327, accuracy: 98.9373%, f1_score: 0.9786, auc: 0.9987


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.3742, accuracy: 91.4051%, f1_score: 0.8329, best_thres: 0.0000, auc: 0.9550


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4804s, loss: 0.0270: 100%|██████████| 1080/1080 [08:43<00:00,  2.48it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2507s, loss: 0.0270, accuracy: 99.0589%, f1_score: 0.9811, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.4172, accuracy: 91.7989%, f1_score: 0.8370, best_thres: 0.0000, auc: 0.9551


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4801s, loss: 0.0225: 100%|██████████| 1080/1080 [08:42<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.9479s, loss: 0.0225, accuracy: 99.2298%, f1_score: 0.9845, auc: 0.9994


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4653, accuracy: 91.8105%, f1_score: 0.8401, best_thres: 0.0000, auc: 0.9547
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4804s, loss: 0.0181: 100%|██████████| 1080/1080 [08:43<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:523.2141s, loss: 0.0181, accuracy: 99.3195%, f1_score: 0.9863, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5528, accuracy: 91.6947%, f1_score: 0.8341, best_thres: 0.0000, auc: 0.9547
-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4800s, loss: 0.0151: 100%|██████████| 1080/1080 [08:42<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.7440s, loss: 0.0151, accuracy: 99.4585%, f1_score: 0.9891, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5585, accuracy: 91.9842%, f1_score: 0.8378, best_thres: 0.0000, auc: 0.9537
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4798s, loss: 0.0136: 100%|██████████| 1080/1080 [08:42<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.4786s, loss: 0.0136, accuracy: 99.4643%, f1_score: 0.9892, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.5004, accuracy: 92.2043%, f1_score: 0.8450, best_thres: 0.0000, auc: 0.9564


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4797s, loss: 0.0119: 100%|██████████| 1080/1080 [08:42<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.3629s, loss: 0.0119, accuracy: 99.5280%, f1_score: 0.9905, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.5396, accuracy: 92.0306%, f1_score: 0.8435, best_thres: 0.0000, auc: 0.9571


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4786s, loss: 0.0095: 100%|██████████| 1080/1080 [08:41<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.2491s, loss: 0.0095, accuracy: 99.6236%, f1_score: 0.9924, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5847, accuracy: 92.1927%, f1_score: 0.8416, best_thres: 0.0000, auc: 0.9560
-> Start epoch 16


Batch num: 1080. Avg. batch proc. time: 0.4785s, loss: 0.0083: 100%|██████████| 1080/1080 [08:40<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.2021s, loss: 0.0083, accuracy: 99.6438%, f1_score: 0.9928, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.5915, accuracy: 92.1927%, f1_score: 0.8424, best_thres: 0.0000, auc: 0.9572


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 17


Batch num: 1080. Avg. batch proc. time: 0.4791s, loss: 0.0067: 100%|██████████| 1080/1080 [08:41<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.7755s, loss: 0.0067, accuracy: 99.6873%, f1_score: 0.9937, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.6066, accuracy: 92.1348%, f1_score: 0.8442, best_thres: 0.0000, auc: 0.9581


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 18


Batch num: 1080. Avg. batch proc. time: 0.4794s, loss: 0.0061: 100%|██████████| 1080/1080 [08:41<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:522.1663s, loss: 0.0061, accuracy: 99.7018%, f1_score: 0.9940, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.6414, accuracy: 92.1580%, f1_score: 0.8428, best_thres: 0.0000, auc: 0.9551
-> Start epoch 19


Batch num: 1080. Avg. batch proc. time: 0.4790s, loss: 0.0046: 100%|██████████| 1080/1080 [08:41<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.7684s, loss: 0.0046, accuracy: 99.7626%, f1_score: 0.9952, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.6586, accuracy: 92.2622%, f1_score: 0.8452, best_thres: 0.0000, auc: 0.9545
-> Start epoch 20


Batch num: 1080. Avg. batch proc. time: 0.4791s, loss: 0.0037: 100%|██████████| 1080/1080 [08:41<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:521.8352s, loss: 0.0037, accuracy: 99.8002%, f1_score: 0.9960, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.84s/it]


-> Validation loss: 0.6698, accuracy: 92.3086%, f1_score: 0.8458, best_thres: 0.0000, auc: 0.9536
k_best_score : [0.95775376 0.95528151 0.95614805 0.95550344 0.95811628]
k weights : [[[0.20024947]]

 [[0.19973256]]

 [[0.19991374]]

 [[0.19977897]]

 [[0.20032526]]]
0.8352458638646942 0.58
dev auc:  0.9435412692817055
	* Saving dev result...
	* Predicting...
	* Saving test result...
