In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import jieba
import re
import os
import time
import gc

from sklearn.model_selection import train_test_split, KFold
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer, BertModel, BertForSequenceClassification, BertConfig, get_linear_schedule_with_warmup
from tqdm import tqdm
from sklearn.metrics import f1_score, auc, roc_curve, classification_report

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [4]:
import sys
sys.path.append('..')

In [5]:
import config
from config import device, is_cuda

## dataset

In [6]:
class QAMatchDataset(Dataset):
    def __init__(self, df, tokenizer, max_seq_len, mode):
        assert mode in ['train', 'dev', 'test']

        self.mode = mode
        self.tokenizer = tokenizer
        self.df = df
        self.max_seq_len = max_seq_len
        # self.df = pd.read_csv(file)
        # self.seqs, self.seq_masks, self.seq_segments, self.labels = self.get_input(file)

    def __getitem__(self, idx):
        token_seq_1 = self.df.iloc[idx]['question']
        token_seq_2 = self.df.iloc[idx]['reply_content']
        if self.mode in ['train', 'dev']:
            label_tensor = torch.tensor(self.df.iloc[idx]['label'])
        else:
            label_tensor = None
            
        # 加入[CLS]和[SEP]，并进行截断，注意这里没有padding
        inputs =self.tokenizer.encode_plus(token_seq_1, token_seq_2, add_special_tokens=True, 
                                           max_length=self.max_seq_len, truncation='longest_first')
        
        seq = inputs['input_ids']
        seq_segment = inputs['token_type_ids']
#         seq_mask = [1] * len(seq)
        padding = [0] * (self.max_seq_len - len(seq))
        
        seq += padding
        seq_segment += padding
#         seq_mask += padding
        
        return torch.Tensor(seq).type(torch.long), torch.Tensor(seq_segment).type(torch.long), label_tensor
    
    def collate_fn(self, samples):
#         print(samples[0])
        seqs = torch.stack([s[0] for s in samples])
        seq_segments = torch.stack([s[1] for s in samples])

        if self.mode in ['train', 'dev']:
            labels = torch.stack([s[2] for s in samples])
        else:
            labels = None
            
        # attention mask处理
        seq_masks = torch.zeros(seqs.shape, dtype=torch.long)
        seq_masks = seq_masks.masked_fill(seqs != 0, 1)

        return seqs, seq_masks, seq_segments, labels
    def __len__(self):
        return len(self.df)

## model

In [7]:
class TextCNN(nn.Module):
    # 这个模型的作用：接受一个句子的embedding（从pretrain model中提取的），输出这个句子的句向量
    # 最后输出的句向量大小：len(window_sizes) * feature_size ,即窗口个数 * 由每个窗口提取到的特征
    # 积后得到的feature_map的大小 = (seq_len - window_size) / stride + 1
    def __init__(self, embedding_size, feature_size, window_sizes, max_seq_len, dropout_rate=0.5):
        super(TextCNN, self).__init__()
        self.convs = nn.ModuleList([
            nn.Sequential(nn.Conv1d(in_channels=embedding_size, out_channels=feature_size, kernel_size=h),
                         nn.ReLU(),
                         nn.MaxPool1d(kernel_size=max_seq_len-h+1))   
            for h in window_sizes
        ])
        # Conv1d指的就是在纵列方向上做卷积，out_channels指每种kernel(窗口)要有几个
        # MaxPool1d指的就是在纵列方向上做池化，kernel_size设成feature_map的大小，就相当于对每个feature_map做max pool
        # 以下是网络结构中每一层的维度变化
#                 x                             permute                                conv                             MaxPool1d
# (batch, seq_len, embedding_size) -> (batch, embedding_size, seq_len) -> (batch, feature_size, max_seq_len-h+1) -> (batch, feature_size, 1)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.linear = nn.Linear(feature_size * len(window_sizes), 1)
        
    def forward(self, x):
        """
        :param x: (batch, max_seq_len, embedding_size)
        
        :return output: (batch, len(window_sizes) * feature_size)
        """
        x = x.permute(0, 2, 1)   # (batch, embedding_size, seq_len)  因为一维卷积是在最后维度上扫的
        out = [conv(x) for conv in self.convs]    # out[i]: (batch, feature_size, 1)
        out = torch.cat(out, dim=1)    # (batch, len(window_sizes)*feature_size, 1) 把所有窗口得到的feature拼接起来
        out = out.view(-1, out.size(1)) # (batch, len(window_sizes)*feature_size)
        
        out = self.dropout(out)
        out = self.linear(out)
        
        return out       # (batch, 1)
    
class BertModelWithTextCNN(nn.Module):
    def __init__(self, params):
        super(BertModelWithTextCNN, self).__init__()        
        self.bert_config = BertConfig.from_pretrained(os.path.join(params['pretrained_model_path'], 'config.json'))
#         self.bert_config.output_hidden_states = True
        self.max_seq_len = params['max_seq_len']
        self.bert = BertModel.from_pretrained(params['pretrained_model_path'], output_hidden_states=False)
        self.textcnn = TextCNN(self.bert_config.hidden_size, params['feature_size'], params['window_sizes'], 
                               params['max_seq_len'], dropout_rate=params['dropout_rate'])
        
        self.loss_fn = nn.BCELoss()
        for param in self.bert.parameters():
            param.requires_grad = True     # fine-tune，每个参数都要更新

    def forward(self, batch_seqs, batch_seq_masks, batch_seq_segments, labels=None):
        """
        :param batch_seqs: input_ids
        :param batch_seq_masks: attention_mask
        :param batch_seq_segments: token_type_ids
        :param labels:
        :return: outputs: (loss, logits, ...)
                 outputs: (logits, ...)
        注：
        hidden_size: bert中的hidden_size
        lstm_hidden_szie: lstm中的hidden_size
        """
        # last_hidden_state, (batch_size, sequence_length, hidden_size)
        last_hidden_state = self.bert(input_ids=batch_seqs,
                                attention_mask=batch_seq_masks,
                                token_type_ids=batch_seq_segments)[0]
        x = self.textcnn(last_hidden_state) # (batch, 1)
        # sigmoid
        output = torch.sigmoid(x)    # (batch_size, 1) 即模型预测每个样本为1的概率

        logits = x
        proba_0 = 1.0 - output     # (batch_size, 1)
        probabilities = torch.cat((proba_0, output), dim=1)   # (batch_size, 2)
        if labels is not None:
            # 有标签，则返回loss, logits, probabilities
            loss = self.loss_fn(output.squeeze(), labels.type(torch.float))
            outputs = (loss, logits, probabilities)
        else:
            # 无标签，则返回logits, probabilities
            outputs = (logits, probabilities)

        return outputs

In [8]:
def writeToLog(path, content):
    with open(path, 'a') as fp:
        fp.write(content)
        fp.write('\n')

## train

In [9]:
def train(train_dataloader, dev_dataloader, params, bert_tokenizer, best_model_path, output_path, fold,
          version, checkpoint=None):
    # ---------------------- Model definition ---------------------- #
    print("\t* Building model...")
    bulid_time = time.time()
    model = BertModelWithTextCNN(params).to(device)
    print("\t* Building model time:{:.4f}s".format(time.time()-bulid_time))
    # ---------------------- Preparation for training -------------- #
#     param_optimizer = list(model.named_parameters())
    # 这里，指定部分参数不做权重衰减
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': params['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=params['lr'])
#     optimizer = Adam(model.parameters(), lr=lr)
#     optimizer = SGD(model.parameters(),lr=params['lr'],momentum=params['momentum'], weight_decay=params['l2_weight'])
#     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.85, patience=params['patience'])
    num_training_steps = len(train_dataloader) * params['epochs']
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=num_training_steps)

    best_score = 0.0    # 记录validation最好的结果
    best_thres = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epoch_count = []
    train_losses = []
    valid_losses = []
    train_f1s = []
    valid_f1s = []
    train_aucs = []
    valid_aucs = []
    best_model_saved_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')

    # Compute loss and accuracy before starting (or resuming) training
    # 如果准备start training，这里的valid结果就是预训练BERT（做fine-tune之前）对下游任务的效果
    # 如果准备resuming training，这里的valid结果就是上一次fine-tune的结果
    valid_loss, valid_accuracy, valid_f1, valid_auc, thres = validate(model, dev_dataloader)
    print("\t* Validation loss before training: {:.4f}, accuracy:{:.4f}, "
          "f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
          format(valid_loss, (valid_accuracy * 100), valid_f1, thres, valid_auc))
    print("\n", 20 * "=", "Training Bert model o device: {}".format(device), 20 * "=")

    patience_counter = 0
    for epoch in range(start_epoch, params['epochs']+1):
        print("-> Start epoch {}".format(epoch))
        writeToLog(output_path, "-> Start epoch {}".format(epoch))
        epoch_count.append(epoch)
        # train
        epoch_time, epoch_loss, epoch_accuracy, epoch_f1, epoch_auc = train_for_one_epoch(model,
                                                                                          train_dataloader,
                                                                                          optimizer,
                                                                                          scheduler,
                                                                                          params['max_gradient_norm'])
        train_losses.append(epoch_loss)
        train_f1s.append(epoch_f1)
        train_aucs.append(epoch_auc)
        print("-> Training time:{:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, auc: {:.4f}".
              format(epoch_time, epoch_loss, epoch_accuracy*100, epoch_f1, epoch_auc))
        writeToLog(output_path, "-> Training time:{:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, auc: {:.4f}".
              format(epoch_time, epoch_loss, epoch_accuracy*100, epoch_f1, epoch_auc))
        
        # validation
        valid_loss, valid_accuracy, valid_f1, valid_auc, thres = validate(model, dev_dataloader)
        print("-> Validation loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
              format(valid_loss, valid_accuracy * 100, valid_f1, thres, valid_auc))
        writeToLog(output_path, "-> Validation loss: {:.4f}, accuracy: {:.4f}%, f1_score: {:.4f}, best_thres: {:.4f}, auc: {:.4f}".
              format(valid_loss, valid_accuracy * 100, valid_f1, thres, valid_auc))
        
        valid_losses.append(valid_loss)
        valid_f1s.append(valid_f1)
        valid_aucs.append(valid_auc)
#         scheduler.step(valid_loss)
        
        if valid_auc <= best_score:
            patience_counter += 1
        else:
            best_score = valid_auc
            best_thres = thres
            patience_counter = 0
            best_model_saved_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')
            torch.save({
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,    # k fold时以valid auc来看每折的模型的能力，从而对最终的预测结果进行加权平均
                "best_thres": best_thres,
                "epochs_count": epoch_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            }, best_model_saved_path)

        if patience_counter >= params['early_stoping']:
            print("-> Early stopping: patience limit reached, stopping...")
            break
            
    if patience_counter != 0:
        # 如果最后一个epoch不是最好的模型，则读取之前的最好的模型
        best_checkpoint = torch.load(best_model_saved_path)
        model.load_state_dict(best_checkpoint['model'])
#     return model, best_score, epoch_count, train_losses, train_f1s, train_aucs, valid_losses, valid_f1s, valid_aucs
    return model, best_score


def train_for_one_epoch(model, dataloader, optimizer, scheduler, max_gradient_norm):
    model.train()

    epoch_start_time = time.time()
    running_loss = 0.0   # 记录整个epoch的累加loss
    correct_count = 0.0
    batch_avg_time = 0.0 # 记录该epoch平均batch花费时间
    all_preds = []
    all_pred_probas = []
    all_labels = []

    tqdm_dataloader = tqdm(dataloader)
    for batch_index, data in enumerate(tqdm_dataloader):
        batch_start_time = time.time()
        if is_cuda:
            data = [t.to(device) for t in data if t is not None]
        # 梯度置零
        optimizer.zero_grad()
        seqs, seq_masks, seq_segments, labels = data
        outputs = model(seqs, seq_masks, seq_segments, labels)
        # 回传梯度
        loss = outputs[0]
        logits = outputs[1]
        probabilities = outputs[2]
        # probabilities = nn.functional.softmax(logits, dim=-1)
        loss.backward()
        # 梯度裁剪
        nn.utils.clip_grad_norm_(model.parameters(), max_gradient_norm)
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()
        pred = torch.argmax(probabilities, dim=1)
        correct_count = correct_count + (pred == labels).sum().item()
        batch_avg_time += time.time() - batch_start_time
        all_preds.append(pred.cpu())
        all_labels.append(labels.cpu())
        all_pred_probas.append(probabilities.detach().cpu())

        description = "Batch num: {}. Avg. batch proc. time: {:.4f}s, loss: {:.4f}".\
            format(batch_index+1, batch_avg_time/(batch_index+1), running_loss/(batch_index+1))
        tqdm_dataloader.set_description(description)
#         del data
#         torch.cuda.empty_cache()
        
    all_labels = torch.cat(all_labels)    # 把每个batch的labels平铺成一维tensor (samples, )
    all_preds = torch.cat(all_preds)      # 把每个batch的preds平铺成一维tensor (samples, )
    all_pred_probas = torch.cat(all_pred_probas) # 把每个batch的probas平铺成tensor (samples, 2)

    fpr, tpr, thresholds = roc_curve(all_labels, all_pred_probas[:, 1], pos_label=1)

    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = correct_count / len(dataloader.dataset)
    epoch_time = time.time() - epoch_start_time
    epoch_auc = auc(fpr, tpr)
    epoch_f1 = f1_score(all_labels, all_preds)

    return epoch_time, epoch_loss, epoch_accuracy, epoch_f1, epoch_auc
#     return epoch_time, epoch_loss, epoch_accuracy, 0, epoch_auc


def validate(model, dataloader):
    model.eval()
    running_loss = 0.0  # 记录整个epoch的累加loss
    correct_count = 0.0
    # all_preds = []
    all_labels = []
    all_pred_probas = []
    tqdm_dataloader = tqdm(dataloader)

    # Deactivate autograd for evaluation
    with torch.no_grad():   # 必须加这个，减少显存的使用
        for batch_index, data in enumerate(tqdm_dataloader):
            if is_cuda:
                data = [t.to(device) for t in data if t is not None]

            seqs, seq_masks, seq_segments, labels = data
            outputs = model(seqs, seq_masks, seq_segments, labels)
            loss = outputs[0]
            logits = outputs[1]
            probabilities = outputs[2]
            # probabilities = nn.functional.softmax(logits, dim=-1)

            running_loss += loss.item()
            # _, pred = torch.max(logits, dim=1)

            # correct_count = correct_count + (pred == labels).sum().item()
            # all_preds.append(pred.cpu())
            all_labels.append(labels.cpu())
            all_pred_probas.append(probabilities.cpu())
            
#             del data
#             torch.cuda.empty_cache()
                    
    all_labels = torch.cat(all_labels)  # 把每个batch的labels平铺成一维tensor shape: (samples, )
    # all_preds = torch.cat(all_preds)  # 把每个batch的preds平铺成一维tensor shape: (samples, )
    all_pred_probas = torch.cat(all_pred_probas)  # 把每个batch的probas变成tensor（原来是[tensor, tensor, ...]）


    # best_f1, best_thres = search_f1(all_labels, all_pred_probas[:, 1])
    # all_preds = (all_pred_probas[:, 1] > best_thres).type(torch.long)
    all_preds = torch.argmax(all_pred_probas, dim=1)
    correct_count = (all_preds == all_labels).sum().item()

    fpr, tpr, thresholds = roc_curve(all_labels, all_pred_probas[:, 1], pos_label=1)

    valid_loss = running_loss / len(dataloader)
    valid_acc = correct_count / len(dataloader.dataset)
    valid_f1 = f1_score(all_labels, all_preds)
    # valid_f1 = best_f1
    valid_auc = auc(fpr, tpr)
    best_thres = 0
    return valid_loss, valid_acc, valid_f1, valid_auc, best_thres
    # return valid_loss, valid_acc, 0, 0
    
def search_f1(y_true, y_pred):
    """

    :param y_true: 一维tensor
    :param y_pred: 一维tensor，y_pred[i]表示第i个样本在label为1上的预测概率
    :return:
    """
    best_score = 0.0
    best_thres = 0.0
    for i in range(30, 70):
        thres = i / 100
        y_pred_bin = (y_pred > thres)   # 大于thres的为1，小于thres的为0
        # print("y_pred_bin shape:", y_pred_bin.shape)
        score = f1_score(y_true, y_pred_bin)
        if score > best_score:
            best_score = score
            best_thres = thres

    return best_score, best_thres
    
def get_pred_probas(model, dataloader, is_test=False):
    model.eval()
    probas = None
    all_labels = []
    with torch.no_grad():
        for data in dataloader:
            # 将所有tensors移到GPU上
            if is_cuda:
                data = [t.to(device) for t in data if t is not None]
                
            if is_test:
                seqs, seq_masks, seq_segments = data[:3]
            else:
                seqs, seq_masks, seq_segments, labels = data
                all_labels.append(labels)
            outputs = model(seqs,
                            seq_masks,
                            seq_segments)
            logits = outputs[0]
            probabilities = outputs[1]   # (batch, 2)

            if probas is None:
                probas = probabilities
            else:
                # 将每个batch的预测结果拼接起来
                probas = torch.cat([probas, probabilities])
    if is_test:
        return probas.cpu()
    all_labels = torch.cat(all_labels)  # (len, )
    return probas.cpu(), all_labels.cpu()

## KFold

In [10]:
def k_fold_cross_val(train_df, test_df, params, k, bert_tokenizer, best_model_path, output_path, version):
    kf = KFold(n_splits=k)
    test_dataset = QAMatchDataset(test_df, bert_tokenizer, params['max_seq_len'], mode='test')
    test_dataloader = DataLoader(test_dataset, batch_size=512, num_workers=3, collate_fn=test_dataset.collate_fn)
    dev_labels = []
    dev_probas = []
    k_test_probas = []
    k_best_scores = []
    for fold, (train_idxs, dev_idxs) in enumerate(kf.split(train_df)):
        print("\t* Start "+str(fold)+" fold")
        writeToLog(output_path, "\t* Start "+str(fold)+" fold")
#         dev_labels.extend(train_df.iloc[dev_idxs]['label'].tolist())
        # ---------------------- Data loading -------------------------- #
        print("\t* Building dataset...")
        train_dataset = QAMatchDataset(train_df.iloc[train_idxs], bert_tokenizer, params['max_seq_len'], 'train')
        dev_dataset = QAMatchDataset(train_df.iloc[dev_idxs], bert_tokenizer, params['max_seq_len'], 'dev')

        train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'], num_workers=3,
                                      collate_fn=train_dataset.collate_fn)
        dev_dataloader = DataLoader(dev_dataset, batch_size=512, num_workers=3,
                                    collate_fn=dev_dataset.collate_fn)
        best_model_fold_path = os.path.join(best_model_path, 'best-fine-tune-'+version+'-k'+str(fold)+'.bin')
        checkpoint = None
        if not(os.path.exists(best_model_fold_path)):
            # 若没有
            
            model, best_score = train(train_dataloader, dev_dataloader, params, bert_tokenizer, best_model_path, output_path, 
                                      fold, version, checkpoint=None)
        else:
            checkpoint = torch.load(best_model_fold_path)
            model = BertModelWithTextCNN(params).to(device)
            model.load_state_dict(checkpoint['model'])
            best_score = checkpoint['best_score']
        k_best_scores.append(best_score)
        
        fold_dev_proba, dev_label = get_pred_probas(model, dev_dataloader)
        for idx, proba in zip(dev_idxs, fold_dev_proba):
            train_df.loc[idx, 'proba_0'] = proba[0].item()
            train_df.loc[idx, 'proba_1'] = proba[1].item()
        fold_test_proba = get_pred_probas(model, test_dataloader, is_test=True)
        
        dev_labels.append(dev_label)
        dev_probas.append(fold_dev_proba)  # (k, len(dev_idxs), 2)
        k_test_probas.append(fold_test_proba) # (k, len(test_dataset), 2)
#         model.to(torch.device('cpu'))
        del model, train_dataloader, dev_dataloader, checkpoint
        torch.cuda.empty_cache() 
        time.sleep(5)
    
    dev_labels = torch.cat(dev_labels)  # (len(train_df),)      # 把每一折的验证集的label拼接，得到整个训练集的label
    dev_probas = torch.cat(dev_probas)  # (len(train_df), 2)    # 把每一折的验证集的预测结果拼接，得到整个训练集的预测结果
    
    k_test_probas = torch.stack(k_test_probas) # (k, len(test_dataset), 2)， 只是把[tensor, tensor, ... ]转为tensor
#     test_probas = torch.mean(k_test_probas, dim=0)  # (len(test_dataset), 2)  取每一折的平均

    # k折模型加权融合
    k_best_scores = np.array(k_best_scores)              
    k_weights = k_best_scores / k_best_scores.sum()             # (k,)
    k_weights = np.expand_dims(np.expand_dims(k_weights,1),1)   # (k, 1, 1)
    print('k_best_score :', k_best_scores)
    print('k weights :', k_weights)
    k_test_probas = k_test_probas * k_weights               # 广播机制，使得每个模型预测的概率乘上该模型的权重 (k, len(test_dataset), 2)
    test_probas = torch.sum(k_test_probas, dim=0)           # 求和
    # search f1
    best_f1, best_thres = search_f1(dev_labels, dev_probas[:, 1])
    print(best_f1, best_thres)
    test_preds = (test_probas[:, 1] > best_thres).type(torch.long)
    
    # 不用search f1
    # test_preds = torch.argmax(test_probas, dim=1) 
    return test_preds, k_test_probas, dev_probas, dev_labels, best_f1, best_thres

## 操作

In [12]:
model_version = 'FFTPD-5fold-V9.0'     # 模型版本
scheme_version = 'FFTPD-5fold-V9.0'     # 方案版本
# train_df = pd.read_csv(train_all_path)
train_df = pd.read_csv(config.augmented_V0204_path)
# test_df = pd.read_csv(test_path)
# train_df = pd.read_csv(train_V0_path)
test_df = pd.read_csv(config.test_V0_path)
k = 5

params = {
    'batch_size': 32,
    'epochs': 20,
    'lr': 3e-05,
    'l2_weight':0,
    'weight_decay': 0.01,
    'dropout_rate': 0.4,
    'momentum': 0.8,
    'early_stoping':5,
    'patience': 3,
    'window_sizes': [3, 4, 5],
    'feature_size': 100,
    'max_seq_len': config.max_seq_len,
    'max_gradient_norm': 10.0,
    'pretrained_model_path': config.pretrained_roberta_wwm_ext_large_path
}

bert_tokenizer = BertTokenizer.from_pretrained(os.path.join(params['pretrained_model_path'], 'vocab.txt'))
output_path = os.path.join(config.root_path, 'output/'+scheme_version+'.txt')

print("\t* K fold training and validating...")
test_preds, k_test_probas, dev_probas, dev_labels, best_f1, best_thres = k_fold_cross_val(train_df, test_df, params, k, 
                                                                                          bert_tokenizer, config.best_model_path, 
                                                                                          output_path, model_version)
dev_preds = (dev_probas[:, 1] > best_thres).type(torch.long)
fpr, tpr, thresholds = roc_curve(dev_labels, dev_probas[:, 1], pos_label=1)
dev_auc = auc(fpr, tpr)
print('dev auc: ',dev_auc)

print("\t* Saving dev result...")
with open(os.path.join(config.root_path, 'report/'+scheme_version+'_'+'classification_report.txt'), 'w') as fp:
    fp.write(classification_report(dev_labels, dev_preds))
    fp.write('\n')
    fp.write('f1-score: {:.4f}'.format(f1_score(dev_labels, dev_preds)))
    fp.write(' auc: {:.4f}'.format(dev_auc))

train_df.to_csv(os.path.join(config.root_path, 'result/'+scheme_version+'_pred_result.csv'), index=0)

print("\t* Predicting...")
test_df['pred'] = test_preds.cpu().numpy()
k_test_probas = k_test_probas.cpu().numpy()

print("\t* Saving test result...")
# 保存预测结果
time_str = '' + time.strftime("%Y%m%d%H%M", time.localtime())
test_df[['dialog_id', 'reply_id', 'pred']].to_csv(os.path.join(config.root_path,'submission/'+scheme_version+'_'+time_str+'.csv'),
                                                  sep='\t',
                                                  index=0,
                                                  header=0)
# 保存K折预测概率结果
k_test_probas_path = os.path.join(config.root_path, 'result/'+scheme_version+'_'+str(k)+'_test_probas.npz')
if not os.path.exists(k_test_probas_path):
    np.save(k_test_probas_path, k_test_probas)

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


	* K fold training and validating...
	* Start 0 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:10.8905s


100%|██████████| 17/17 [00:32<00:00,  1.81s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.5986, accuracy:75.5733, f1_score: 0.0000, best_thres: 0.0000, auc: 0.5190

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4824s, loss: 0.3425: 100%|██████████| 1080/1080 [08:45<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.3917s, loss: 0.3425, accuracy: 85.5997%, f1_score: 0.6866, auc: 0.8931


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2608, accuracy: 89.3097%, f1_score: 0.7781, best_thres: 0.0000, auc: 0.9423


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4815s, loss: 0.2009: 100%|██████████| 1080/1080 [08:44<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.3752s, loss: 0.2009, accuracy: 92.2222%, f1_score: 0.8452, auc: 0.9648


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2634, accuracy: 89.7962%, f1_score: 0.7988, best_thres: 0.0000, auc: 0.9456


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4842s, loss: 0.1262: 100%|██████████| 1080/1080 [08:47<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.2765s, loss: 0.1262, accuracy: 95.4480%, f1_score: 0.9098, auc: 0.9855


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2843, accuracy: 90.6764%, f1_score: 0.8134, best_thres: 0.0000, auc: 0.9500


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4846s, loss: 0.0867: 100%|██████████| 1080/1080 [08:47<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.7468s, loss: 0.0867, accuracy: 96.9392%, f1_score: 0.9393, auc: 0.9930


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3039, accuracy: 90.4679%, f1_score: 0.8101, best_thres: 0.0000, auc: 0.9509


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4824s, loss: 0.0651: 100%|██████████| 1080/1080 [08:45<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.2815s, loss: 0.0651, accuracy: 97.8109%, f1_score: 0.9567, auc: 0.9958


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3525, accuracy: 90.9775%, f1_score: 0.8139, best_thres: 0.0000, auc: 0.9539


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0457: 100%|██████████| 1080/1080 [08:45<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.4894s, loss: 0.0457, accuracy: 98.4363%, f1_score: 0.9690, auc: 0.9978


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3648, accuracy: 91.5566%, f1_score: 0.8279, best_thres: 0.0000, auc: 0.9546


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4839s, loss: 0.0394: 100%|██████████| 1080/1080 [08:46<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.7390s, loss: 0.0394, accuracy: 98.6593%, f1_score: 0.9734, auc: 0.9982


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3427, accuracy: 91.9504%, f1_score: 0.8389, best_thres: 0.0000, auc: 0.9562


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0314: 100%|██████████| 1080/1080 [08:45<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.3036s, loss: 0.0314, accuracy: 98.9836%, f1_score: 0.9798, auc: 0.9989


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4149, accuracy: 91.7420%, f1_score: 0.8354, best_thres: 0.0000, auc: 0.9547
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4833s, loss: 0.0272: 100%|██████████| 1080/1080 [08:45<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.0750s, loss: 0.0272, accuracy: 99.1342%, f1_score: 0.9828, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4196, accuracy: 91.4524%, f1_score: 0.8317, best_thres: 0.0000, auc: 0.9528
-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4833s, loss: 0.0223: 100%|██████████| 1080/1080 [08:45<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.0625s, loss: 0.0223, accuracy: 99.2326%, f1_score: 0.9848, auc: 0.9993


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4847, accuracy: 91.6956%, f1_score: 0.8317, best_thres: 0.0000, auc: 0.9526
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4836s, loss: 0.0214: 100%|██████████| 1080/1080 [08:46<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.4214s, loss: 0.0214, accuracy: 99.3253%, f1_score: 0.9866, auc: 0.9994


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4505, accuracy: 92.1589%, f1_score: 0.8407, best_thres: 0.0000, auc: 0.9567


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4824s, loss: 0.0162: 100%|██████████| 1080/1080 [08:45<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.1282s, loss: 0.0162, accuracy: 99.4382%, f1_score: 0.9888, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4386, accuracy: 91.9273%, f1_score: 0.8357, best_thres: 0.0000, auc: 0.9567


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4834s, loss: 0.0153: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.1710s, loss: 0.0153, accuracy: 99.4933%, f1_score: 0.9899, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4744, accuracy: 91.7188%, f1_score: 0.8375, best_thres: 0.0000, auc: 0.9569


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0109: 100%|██████████| 1080/1080 [08:45<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.6843s, loss: 0.0109, accuracy: 99.5859%, f1_score: 0.9918, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5387, accuracy: 92.1473%, f1_score: 0.8409, best_thres: 0.0000, auc: 0.9551
-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4846s, loss: 0.0097: 100%|██████████| 1080/1080 [08:47<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.4483s, loss: 0.0097, accuracy: 99.6178%, f1_score: 0.9924, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5228, accuracy: 92.1589%, f1_score: 0.8416, best_thres: 0.0000, auc: 0.9542
-> Start epoch 16


Batch num: 1080. Avg. batch proc. time: 0.4827s, loss: 0.0091: 100%|██████████| 1080/1080 [08:45<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.3131s, loss: 0.0091, accuracy: 99.6467%, f1_score: 0.9930, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5582, accuracy: 92.0547%, f1_score: 0.8408, best_thres: 0.0000, auc: 0.9534
-> Start epoch 17


Batch num: 1080. Avg. batch proc. time: 0.4836s, loss: 0.0063: 100%|██████████| 1080/1080 [08:46<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.3590s, loss: 0.0063, accuracy: 99.6931%, f1_score: 0.9939, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.6066, accuracy: 92.2979%, f1_score: 0.8418, best_thres: 0.0000, auc: 0.9519
-> Start epoch 18


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0056: 100%|██████████| 1080/1080 [08:45<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.3214s, loss: 0.0056, accuracy: 99.7626%, f1_score: 0.9953, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.5949, accuracy: 92.4369%, f1_score: 0.8462, best_thres: 0.0000, auc: 0.9535
-> Early stopping: patience limit reached, stopping...
	* Start 1 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.6327s


100%|██████████| 17/17 [00:33<00:00,  1.86s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.7977, accuracy:24.5425, f1_score: 0.3933, best_thres: 0.0000, auc: 0.4642

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4839s, loss: 0.3488: 100%|██████████| 1080/1080 [08:46<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.1714s, loss: 0.3488, accuracy: 85.1798%, f1_score: 0.6809, auc: 0.8894


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2799, accuracy: 88.9854%, f1_score: 0.7703, best_thres: 0.0000, auc: 0.9331


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4823s, loss: 0.2133: 100%|██████████| 1080/1080 [08:45<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.3408s, loss: 0.2133, accuracy: 91.6401%, f1_score: 0.8316, auc: 0.9610


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2519, accuracy: 89.8541%, f1_score: 0.7954, best_thres: 0.0000, auc: 0.9464


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4837s, loss: 0.1381: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8070s, loss: 0.1381, accuracy: 94.8746%, f1_score: 0.8983, auc: 0.9830


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2581, accuracy: 89.8888%, f1_score: 0.8049, best_thres: 0.0000, auc: 0.9517


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4824s, loss: 0.0939: 100%|██████████| 1080/1080 [08:45<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.4224s, loss: 0.0939, accuracy: 96.5483%, f1_score: 0.9317, auc: 0.9920


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3055, accuracy: 91.0470%, f1_score: 0.8127, best_thres: 0.0000, auc: 0.9483
-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4839s, loss: 0.0734: 100%|██████████| 1080/1080 [08:46<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.0545s, loss: 0.0734, accuracy: 97.3852%, f1_score: 0.9483, auc: 0.9948


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3292, accuracy: 91.2092%, f1_score: 0.8208, best_thres: 0.0000, auc: 0.9518


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4835s, loss: 0.0549: 100%|██████████| 1080/1080 [08:46<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.5630s, loss: 0.0549, accuracy: 98.1844%, f1_score: 0.9640, auc: 0.9969


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3082, accuracy: 90.8501%, f1_score: 0.8191, best_thres: 0.0000, auc: 0.9544


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4832s, loss: 0.0422: 100%|██████████| 1080/1080 [08:46<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.3627s, loss: 0.0422, accuracy: 98.4363%, f1_score: 0.9690, auc: 0.9983


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3459, accuracy: 91.2671%, f1_score: 0.8275, best_thres: 0.0000, auc: 0.9541
-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4835s, loss: 0.0359: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.5520s, loss: 0.0359, accuracy: 98.7751%, f1_score: 0.9757, auc: 0.9984


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4099, accuracy: 90.6532%, f1_score: 0.8209, best_thres: 0.0000, auc: 0.9518
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0315: 100%|██████████| 1080/1080 [08:45<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.9047s, loss: 0.0315, accuracy: 98.9604%, f1_score: 0.9793, auc: 0.9989


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3390, accuracy: 91.4177%, f1_score: 0.8282, best_thres: 0.0000, auc: 0.9535
-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4828s, loss: 0.0241: 100%|██████████| 1080/1080 [08:45<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.8189s, loss: 0.0241, accuracy: 99.1863%, f1_score: 0.9838, auc: 0.9994


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4238, accuracy: 91.9388%, f1_score: 0.8365, best_thres: 0.0000, auc: 0.9559


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4829s, loss: 0.0208: 100%|██████████| 1080/1080 [08:45<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.9363s, loss: 0.0208, accuracy: 99.2934%, f1_score: 0.9860, auc: 0.9995


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4236, accuracy: 91.9273%, f1_score: 0.8403, best_thres: 0.0000, auc: 0.9580


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4850s, loss: 0.0174: 100%|██████████| 1080/1080 [08:48<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.2564s, loss: 0.0174, accuracy: 99.3803%, f1_score: 0.9877, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4686, accuracy: 92.2168%, f1_score: 0.8412, best_thres: 0.0000, auc: 0.9571
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4842s, loss: 0.0170: 100%|██████████| 1080/1080 [08:47<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.3697s, loss: 0.0170, accuracy: 99.3919%, f1_score: 0.9879, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4642, accuracy: 92.1242%, f1_score: 0.8428, best_thres: 0.0000, auc: 0.9585


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4837s, loss: 0.0130: 100%|██████████| 1080/1080 [08:46<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8032s, loss: 0.0130, accuracy: 99.5280%, f1_score: 0.9906, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4787, accuracy: 92.0431%, f1_score: 0.8382, best_thres: 0.0000, auc: 0.9576
-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4846s, loss: 0.0102: 100%|██████████| 1080/1080 [08:47<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.7960s, loss: 0.0102, accuracy: 99.5975%, f1_score: 0.9920, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5469, accuracy: 92.2631%, f1_score: 0.8419, best_thres: 0.0000, auc: 0.9575
-> Start epoch 16


Batch num: 1080. Avg. batch proc. time: 0.4837s, loss: 0.0085: 100%|██████████| 1080/1080 [08:46<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8054s, loss: 0.0085, accuracy: 99.6438%, f1_score: 0.9929, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5620, accuracy: 92.4832%, f1_score: 0.8463, best_thres: 0.0000, auc: 0.9577
-> Start epoch 17


Batch num: 1080. Avg. batch proc. time: 0.4825s, loss: 0.0071: 100%|██████████| 1080/1080 [08:45<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.4194s, loss: 0.0071, accuracy: 99.6815%, f1_score: 0.9937, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5936, accuracy: 92.3790%, f1_score: 0.8423, best_thres: 0.0000, auc: 0.9579
-> Start epoch 18


Batch num: 1080. Avg. batch proc. time: 0.4828s, loss: 0.0052: 100%|██████████| 1080/1080 [08:45<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.8465s, loss: 0.0052, accuracy: 99.7278%, f1_score: 0.9946, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.6053, accuracy: 92.4137%, f1_score: 0.8434, best_thres: 0.0000, auc: 0.9593


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 19


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0047: 100%|██████████| 1080/1080 [08:45<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.5313s, loss: 0.0047, accuracy: 99.7626%, f1_score: 0.9953, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.6374, accuracy: 92.4600%, f1_score: 0.8471, best_thres: 0.0000, auc: 0.9595


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 20


Batch num: 1080. Avg. batch proc. time: 0.4819s, loss: 0.0039: 100%|██████████| 1080/1080 [08:44<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:524.8015s, loss: 0.0039, accuracy: 99.7626%, f1_score: 0.9953, auc: 1.0000


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.6433, accuracy: 92.5064%, f1_score: 0.8445, best_thres: 0.0000, auc: 0.9597
	* Start 2 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.3175s


100%|██████████| 17/17 [00:33<00:00,  1.86s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.6259, accuracy:74.7394, f1_score: 0.0073, best_thres: 0.0000, auc: 0.5181

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4840s, loss: 0.3442: 100%|██████████| 1080/1080 [08:46<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.0920s, loss: 0.3442, accuracy: 85.5099%, f1_score: 0.6808, auc: 0.8907


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2705, accuracy: 88.8927%, f1_score: 0.7759, best_thres: 0.0000, auc: 0.9383


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4836s, loss: 0.2098: 100%|██████████| 1080/1080 [08:46<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.7737s, loss: 0.2098, accuracy: 91.6749%, f1_score: 0.8321, auc: 0.9618


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2635, accuracy: 89.7498%, f1_score: 0.7997, best_thres: 0.0000, auc: 0.9441


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4829s, loss: 0.1312: 100%|██████████| 1080/1080 [08:45<00:00,  2.48it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.9054s, loss: 0.1312, accuracy: 95.1787%, f1_score: 0.9038, auc: 0.9845


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3076, accuracy: 88.8580%, f1_score: 0.7982, best_thres: 0.0000, auc: 0.9471


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4843s, loss: 0.0912: 100%|██████████| 1080/1080 [08:47<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.4225s, loss: 0.0912, accuracy: 96.8003%, f1_score: 0.9363, auc: 0.9923


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3375, accuracy: 90.3289%, f1_score: 0.8137, best_thres: 0.0000, auc: 0.9451
-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4840s, loss: 0.0686: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.1592s, loss: 0.0686, accuracy: 97.5473%, f1_score: 0.9511, auc: 0.9956


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3776, accuracy: 90.9659%, f1_score: 0.8207, best_thres: 0.0000, auc: 0.9467
-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4838s, loss: 0.0523: 100%|██████████| 1080/1080 [08:46<00:00,  2.59it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8364s, loss: 0.0523, accuracy: 98.2365%, f1_score: 0.9648, auc: 0.9971


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3657, accuracy: 91.2439%, f1_score: 0.8161, best_thres: 0.0000, auc: 0.9527


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4845s, loss: 0.0441: 100%|██████████| 1080/1080 [08:47<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.7463s, loss: 0.0441, accuracy: 98.4624%, f1_score: 0.9692, auc: 0.9980


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3448, accuracy: 91.0702%, f1_score: 0.8324, best_thres: 0.0000, auc: 0.9577


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4834s, loss: 0.0386: 100%|██████████| 1080/1080 [08:46<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.5785s, loss: 0.0386, accuracy: 98.7085%, f1_score: 0.9742, auc: 0.9981


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3881, accuracy: 91.4061%, f1_score: 0.8321, best_thres: 0.0000, auc: 0.9553
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4845s, loss: 0.0297: 100%|██████████| 1080/1080 [08:47<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.8586s, loss: 0.0297, accuracy: 99.0213%, f1_score: 0.9804, auc: 0.9991


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4144, accuracy: 91.8114%, f1_score: 0.8385, best_thres: 0.0000, auc: 0.9557
-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0234: 100%|██████████| 1080/1080 [08:45<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.1804s, loss: 0.0234, accuracy: 99.2240%, f1_score: 0.9845, auc: 0.9993


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3535, accuracy: 91.6261%, f1_score: 0.8388, best_thres: 0.0000, auc: 0.9552
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4840s, loss: 0.0206: 100%|██████████| 1080/1080 [08:46<00:00,  2.57it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.1925s, loss: 0.0206, accuracy: 99.2848%, f1_score: 0.9857, auc: 0.9995


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4255, accuracy: 92.1473%, f1_score: 0.8415, best_thres: 0.0000, auc: 0.9582


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4838s, loss: 0.0176: 100%|██████████| 1080/1080 [08:46<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.9425s, loss: 0.0176, accuracy: 99.3514%, f1_score: 0.9870, auc: 0.9997


100%|██████████| 17/17 [00:34<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4972, accuracy: 91.9157%, f1_score: 0.8435, best_thres: 0.0000, auc: 0.9542
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0130: 100%|██████████| 1080/1080 [08:45<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.8655s, loss: 0.0130, accuracy: 99.5106%, f1_score: 0.9902, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5074, accuracy: 92.4485%, f1_score: 0.8474, best_thres: 0.0000, auc: 0.9579
-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4838s, loss: 0.0124: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8439s, loss: 0.0124, accuracy: 99.5801%, f1_score: 0.9916, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5262, accuracy: 92.2052%, f1_score: 0.8403, best_thres: 0.0000, auc: 0.9567
-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4832s, loss: 0.0100: 100%|██████████| 1080/1080 [08:46<00:00,  2.58it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.2198s, loss: 0.0100, accuracy: 99.5656%, f1_score: 0.9913, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5887, accuracy: 92.3790%, f1_score: 0.8468, best_thres: 0.0000, auc: 0.9574
-> Start epoch 16


Batch num: 1080. Avg. batch proc. time: 0.4842s, loss: 0.0086: 100%|██████████| 1080/1080 [08:47<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.2309s, loss: 0.0086, accuracy: 99.6583%, f1_score: 0.9931, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.5434, accuracy: 92.5180%, f1_score: 0.8474, best_thres: 0.0000, auc: 0.9578
-> Early stopping: patience limit reached, stopping...
	* Start 3 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:8.0177s


100%|██████████| 17/17 [00:33<00:00,  1.86s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.6873, accuracy:55.2763, f1_score: 0.3064, best_thres: 0.0000, auc: 0.4923

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4833s, loss: 0.3421: 100%|██████████| 1080/1080 [08:46<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.3737s, loss: 0.3421, accuracy: 85.4611%, f1_score: 0.6844, auc: 0.8936


100%|██████████| 17/17 [00:34<00:00,  1.87s/it]


-> Validation loss: 0.2744, accuracy: 88.5903%, f1_score: 0.7777, best_thres: 0.0000, auc: 0.9400


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4847s, loss: 0.2017: 100%|██████████| 1080/1080 [08:47<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.9463s, loss: 0.2017, accuracy: 92.0747%, f1_score: 0.8407, auc: 0.9649


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2714, accuracy: 88.9378%, f1_score: 0.8029, best_thres: 0.0000, auc: 0.9484


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4838s, loss: 0.1273: 100%|██████████| 1080/1080 [08:46<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.8646s, loss: 0.1273, accuracy: 95.2396%, f1_score: 0.9054, auc: 0.9852


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2737, accuracy: 91.2777%, f1_score: 0.8280, best_thres: 0.0000, auc: 0.9544


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4842s, loss: 0.0875: 100%|██████████| 1080/1080 [08:47<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.3010s, loss: 0.0875, accuracy: 96.8669%, f1_score: 0.9377, auc: 0.9927


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3079, accuracy: 91.1966%, f1_score: 0.8190, best_thres: 0.0000, auc: 0.9537
-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4837s, loss: 0.0629: 100%|██████████| 1080/1080 [08:46<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.7849s, loss: 0.0629, accuracy: 97.7849%, f1_score: 0.9558, auc: 0.9957


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3316, accuracy: 90.8838%, f1_score: 0.8292, best_thres: 0.0000, auc: 0.9556


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0509: 100%|██████████| 1080/1080 [08:45<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.0431s, loss: 0.0509, accuracy: 98.2771%, f1_score: 0.9656, auc: 0.9973


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3821, accuracy: 90.9417%, f1_score: 0.8304, best_thres: 0.0000, auc: 0.9530
-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4837s, loss: 0.0398: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.7309s, loss: 0.0398, accuracy: 98.6477%, f1_score: 0.9730, auc: 0.9981


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4252, accuracy: 91.1387%, f1_score: 0.8350, best_thres: 0.0000, auc: 0.9553
-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4825s, loss: 0.0341: 100%|██████████| 1080/1080 [08:45<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.4870s, loss: 0.0341, accuracy: 98.8244%, f1_score: 0.9765, auc: 0.9987


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4359, accuracy: 90.9533%, f1_score: 0.8334, best_thres: 0.0000, auc: 0.9581


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4849s, loss: 0.0275: 100%|██████████| 1080/1080 [08:47<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.1299s, loss: 0.0275, accuracy: 99.0358%, f1_score: 0.9807, auc: 0.9992


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4293, accuracy: 91.8916%, f1_score: 0.8414, best_thres: 0.0000, auc: 0.9552
-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4858s, loss: 0.0255: 100%|██████████| 1080/1080 [08:49<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:529.2679s, loss: 0.0255, accuracy: 99.1805%, f1_score: 0.9836, auc: 0.9990


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3847, accuracy: 91.9032%, f1_score: 0.8416, best_thres: 0.0000, auc: 0.9587


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4827s, loss: 0.0192: 100%|██████████| 1080/1080 [08:45<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.7518s, loss: 0.0192, accuracy: 99.3398%, f1_score: 0.9868, auc: 0.9995


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4944, accuracy: 91.3703%, f1_score: 0.8327, best_thres: 0.0000, auc: 0.9546
-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4841s, loss: 0.0152: 100%|██████████| 1080/1080 [08:47<00:00,  2.53it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.3022s, loss: 0.0152, accuracy: 99.4788%, f1_score: 0.9896, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5040, accuracy: 92.0306%, f1_score: 0.8396, best_thres: 0.0000, auc: 0.9553
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4834s, loss: 0.0140: 100%|██████████| 1080/1080 [08:46<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.4255s, loss: 0.0140, accuracy: 99.5020%, f1_score: 0.9900, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4910, accuracy: 92.1348%, f1_score: 0.8444, best_thres: 0.0000, auc: 0.9542
-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0111: 100%|██████████| 1080/1080 [08:45<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.5216s, loss: 0.0111, accuracy: 99.5888%, f1_score: 0.9918, auc: 0.9998


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5538, accuracy: 92.1117%, f1_score: 0.8488, best_thres: 0.0000, auc: 0.9529
-> Start epoch 15


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0094: 100%|██████████| 1080/1080 [08:45<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.0201s, loss: 0.0094, accuracy: 99.6207%, f1_score: 0.9924, auc: 0.9999


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.5320, accuracy: 92.2275%, f1_score: 0.8454, best_thres: 0.0000, auc: 0.9563
-> Early stopping: patience limit reached, stopping...
	* Start 4 fold
	* Building dataset...
	* Building model...


  0%|          | 0/17 [00:00<?, ?it/s]

	* Building model time:7.4109s


100%|██████████| 17/17 [00:33<00:00,  1.86s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

	* Validation loss before training: 0.6041, accuracy:74.4585, f1_score: 0.0000, best_thres: 0.0000, auc: 0.4834

-> Start epoch 1


Batch num: 1080. Avg. batch proc. time: 0.4833s, loss: 0.3466: 100%|██████████| 1080/1080 [08:46<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.3473s, loss: 0.3466, accuracy: 85.3772%, f1_score: 0.6738, auc: 0.8888


100%|██████████| 17/17 [00:34<00:00,  1.87s/it]


-> Validation loss: 0.2766, accuracy: 88.7872%, f1_score: 0.7662, best_thres: 0.0000, auc: 0.9385
-> Start epoch 2


Batch num: 1080. Avg. batch proc. time: 0.4831s, loss: 0.2118: 100%|██████████| 1080/1080 [08:45<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.1180s, loss: 0.2118, accuracy: 91.6027%, f1_score: 0.8293, auc: 0.9611


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.2553, accuracy: 90.4089%, f1_score: 0.8123, best_thres: 0.0000, auc: 0.9502


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 3


Batch num: 1080. Avg. batch proc. time: 0.4834s, loss: 0.1313: 100%|██████████| 1080/1080 [08:46<00:00,  2.54it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.4458s, loss: 0.1313, accuracy: 95.0804%, f1_score: 0.9013, auc: 0.9845


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3525, accuracy: 89.9571%, f1_score: 0.8120, best_thres: 0.0000, auc: 0.9481
-> Start epoch 4


Batch num: 1080. Avg. batch proc. time: 0.4839s, loss: 0.1019: 100%|██████████| 1080/1080 [08:46<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.9671s, loss: 0.1019, accuracy: 96.2589%, f1_score: 0.9247, auc: 0.9909


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3054, accuracy: 91.1387%, f1_score: 0.8285, best_thres: 0.0000, auc: 0.9514


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 5


Batch num: 1080. Avg. batch proc. time: 0.4830s, loss: 0.0669: 100%|██████████| 1080/1080 [08:45<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.9846s, loss: 0.0669, accuracy: 97.5995%, f1_score: 0.9519, auc: 0.9956


100%|██████████| 17/17 [00:34<00:00,  1.87s/it]


-> Validation loss: 0.3003, accuracy: 90.8954%, f1_score: 0.8282, best_thres: 0.0000, auc: 0.9533


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 6


Batch num: 1080. Avg. batch proc. time: 0.4826s, loss: 0.0516: 100%|██████████| 1080/1080 [08:45<00:00,  2.55it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:525.5962s, loss: 0.0516, accuracy: 98.1787%, f1_score: 0.9634, auc: 0.9974


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3288, accuracy: 91.3935%, f1_score: 0.8333, best_thres: 0.0000, auc: 0.9543


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 7


Batch num: 1080. Avg. batch proc. time: 0.4840s, loss: 0.0441: 100%|██████████| 1080/1080 [08:46<00:00,  2.51it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.0945s, loss: 0.0441, accuracy: 98.4451%, f1_score: 0.9688, auc: 0.9981


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3274, accuracy: 91.7989%, f1_score: 0.8400, best_thres: 0.0000, auc: 0.9563


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 8


Batch num: 1080. Avg. batch proc. time: 0.4851s, loss: 0.0377: 100%|██████████| 1080/1080 [08:48<00:00,  2.49it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.2217s, loss: 0.0377, accuracy: 98.6651%, f1_score: 0.9732, auc: 0.9985


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3733, accuracy: 91.4051%, f1_score: 0.8333, best_thres: 0.0000, auc: 0.9533
-> Start epoch 9


Batch num: 1080. Avg. batch proc. time: 0.4852s, loss: 0.0284: 100%|██████████| 1080/1080 [08:48<00:00,  2.52it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.5635s, loss: 0.0284, accuracy: 98.9721%, f1_score: 0.9794, auc: 0.9992


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.3692, accuracy: 91.9032%, f1_score: 0.8371, best_thres: 0.0000, auc: 0.9595


  0%|          | 0/1080 [00:00<?, ?it/s]

-> Start epoch 10


Batch num: 1080. Avg. batch proc. time: 0.4849s, loss: 0.0233: 100%|██████████| 1080/1080 [08:48<00:00,  2.56it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.1810s, loss: 0.0233, accuracy: 99.1776%, f1_score: 0.9835, auc: 0.9994


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.4416, accuracy: 91.3008%, f1_score: 0.8356, best_thres: 0.0000, auc: 0.9553
-> Start epoch 11


Batch num: 1080. Avg. batch proc. time: 0.4838s, loss: 0.0197: 100%|██████████| 1080/1080 [08:46<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:526.9625s, loss: 0.0197, accuracy: 99.2993%, f1_score: 0.9859, auc: 0.9995


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.3860, accuracy: 91.5441%, f1_score: 0.8402, best_thres: 0.0000, auc: 0.9585
-> Start epoch 12


Batch num: 1080. Avg. batch proc. time: 0.4850s, loss: 0.0179: 100%|██████████| 1080/1080 [08:48<00:00,  2.50it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:528.2301s, loss: 0.0179, accuracy: 99.3340%, f1_score: 0.9866, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5099, accuracy: 91.9147%, f1_score: 0.8346, best_thres: 0.0000, auc: 0.9458
-> Start epoch 13


Batch num: 1080. Avg. batch proc. time: 0.4839s, loss: 0.0150: 100%|██████████| 1080/1080 [08:46<00:00,  2.47it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.0220s, loss: 0.0150, accuracy: 99.4498%, f1_score: 0.9889, auc: 0.9996


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]
  0%|          | 0/1080 [00:00<?, ?it/s]

-> Validation loss: 0.5038, accuracy: 92.1580%, f1_score: 0.8425, best_thres: 0.0000, auc: 0.9504
-> Start epoch 14


Batch num: 1080. Avg. batch proc. time: 0.4842s, loss: 0.0114: 100%|██████████| 1080/1080 [08:47<00:00,  2.46it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

-> Training time:527.3011s, loss: 0.0114, accuracy: 99.5772%, f1_score: 0.9915, auc: 0.9997


100%|██████████| 17/17 [00:33<00:00,  1.87s/it]


-> Validation loss: 0.4762, accuracy: 92.2159%, f1_score: 0.8446, best_thres: 0.0000, auc: 0.9592
-> Early stopping: patience limit reached, stopping...
k_best_score : [0.95694635 0.95965037 0.95815771 0.9587083  0.95948084]
k weights : [[[0.19965734]]

 [[0.2002215 ]]

 [[0.19991007]]

 [[0.20002495]]

 [[0.20018613]]]
0.8413112055959402 0.3
dev auc:  0.9490254811629399
	* Saving dev result...
	* Predicting...
	* Saving test result...
