In [1]:
import argparse
import os
import torch
from torch import nn, optim, cuda
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler, \
                             TensorDataset, WeightedRandomSampler


import pickle
from renet2.raw import load_documents, load_documents_ori

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import random

from sklearn.metrics import f1_score, precision_recall_fscore_support, roc_auc_score
from sklearn.model_selection import KFold
from tqdm import tqdm
import re

In [2]:
from renet2.raw_handler import *
from renet2.model import *

  from tqdm.autonotebook import tqdm


In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
sys.argv = ['']

if __name__ == "__main__":
    # set up
    parser = init_parser()
    args = parser.parse_args()
    sys.path.insert(0, 'renet2')
    get_index_path(args)
    
    use_cuda = torch.cuda.is_available() and not args.no_cuda
    device = torch.device('cuda' if use_cuda else 'cpu')
#     device = torch.device('cuda:1')

    torch.manual_seed(args.seed)

    if use_cuda:
        torch.cuda.manual_seed(args.seed)
    set_seed(args)
    args.device = device
    print('using device', device)

    args.ori_tokenizer = loading_tokenizer(args)
    args.token_voc_l = len(args.ori_tokenizer)
    print('tokenizer size %d' % (args.token_voc_l))
    
    args.batch_size = 32
    args.fix_snt_n, args.fix_token_n = 32, 54   
    
    print('fix input sentences# %d, tokens# %d, batch size %d' % (args.fix_snt_n, args.fix_token_n, args.batch_size))

using device cpu
loading word index from /autofs/bal31/jhsu/home/git/tmp/RENET2_b1/src/renet2/utils/word_index
loaded word index, voc size 82948
tokenizer size 82949
fix input sentences# 32, tokens# 54, batch size 32


In [6]:
args.overwrite_cache = False
args.file_name_snt = "sentences.txt"

In [7]:
args.raw_data_dir = "../data/abs_data/1st_ann/"
args.label_f_name = "labels.txt"
    
features_ann_1 = load_and_cache_data(args)

dataset_ann_1, _, _ = convert_features_to_dataset_single(features_ann_1)
dataloader_ann_1 = DataLoader(dataset_ann_1, batch_size=args.batch_size)

loading features from cached file %s ../data/abs_data/1st_ann/cached_all_doc_0_32_54
loading ended


In [8]:
args.raw_data_dir = "../data/abs_data/2nd_ann/"
args.label_f_name = "labels.txt"
    
features_ss_aug = load_and_cache_data(args)

dataset_ss_aug, _, _ = convert_features_to_dataset_single(features_ss_aug)
dataloader_ss_aug = DataLoader(dataset_ss_aug, batch_size=args.batch_size)

loading features from cached file %s ../data/abs_data/2nd_ann/cached_all_doc_0_32_54
loading ended


In [9]:
features_merge = np.concatenate((features_ann_1[0], features_ss_aug[0]), axis=0), \
                pd.concat([features_ann_1[1], features_ss_aug[1]])                

In [10]:
dataset_merge, _, _ = convert_features_to_dataset_single(features_merge)
dataloader_merge = DataLoader(dataset_merge, batch_size=args.batch_size)

In [12]:
for _model_idx in range(10):
    args.num_embedding = 64
    args.cnn_out_c = 100
    args.rnn_out_f_n = 68

    args.rnn_num_directions = 2
    args.rnn_layers = 2
    args.window_sizes = [2, 3, 4, 5]
    args.EB_dp = 0.3
    args.FC_dp = 0.1

    args.use_new_loss = False
    args.use_cls_loss = False


    args.epochs = 20
    args.warmup_epoch = 0
    args.patience_epoch = 3

    args.learning_rate = 1e-3
    args.adam_epsilon = 1e-8
    args.weight_decay = 1e-4
    args.l2_weight_decay = 1e-4
    args.max_grad_norm = 5.0
    args.lr_reduce_factor = .5
    args.threshold = .5
    args.lr_cooldown = 2
    args.use_loss_sh = False
    args.is_iterare_info = False


    args.modle_dir = '../models/abs_models/'
    model_name_prefix = 'Bst_abs_1st_0_' + '%02d' % (_model_idx + 1)
    args.checkpoint_f = os.path.join(args.modle_dir, model_name_prefix + ".ckp")
    args.config_save_f = os.path.join(args.modle_dir,  model_name_prefix + ".cf")


    config = set_model_config(args)
    torch.save(config, args.config_save_f)

    model = Base_Net(config).to(device)
    model_init_w(model)
    optimizer, scheduler = init_model_optimizer(model, args)

    train_dt, dev_dt, test_dt = dataloader_ann_1, None, None

    _ = train(model, optimizer, scheduler, train_dt, dev_dt, args, test_dt, True)

config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_01.ckp
training end, used 87.66 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_02.ckp
training end, used 87.85 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_03.ckp
training end, used 87.85 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_04.ckp
training end, used 87.25 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_05.ckp
training end, used 88.59 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_06.ckp
training end, used 88.26 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_07.ckp
training end, used 87.83 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_08.ckp
training end, used 88.22 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_09.ckp
training end, used 87.27 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=88.0, style=ProgressStyle(description_wid…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_0_10.ckp
training end, used 87.17 s


In [11]:
for _model_idx in range(10):
    args.num_embedding = 64
    args.cnn_out_c = 100
    args.rnn_out_f_n = 68

    args.rnn_num_directions = 2
    args.rnn_layers = 2
    args.window_sizes = [2, 3, 4, 5]
    args.EB_dp = 0.3
    args.FC_dp = 0.1

    args.use_new_loss = False
    args.use_cls_loss = False


    args.epochs = 20
    args.warmup_epoch = 0
    args.patience_epoch = 3

    args.learning_rate = 1e-3
    args.adam_epsilon = 1e-8
    args.weight_decay = 1e-4
    args.l2_weight_decay = 1e-4
    args.max_grad_norm = 5.0
    args.lr_reduce_factor = .5
    args.threshold = .5
    args.lr_cooldown = 2
    args.use_loss_sh = False
    args.is_iterare_info = False


    args.modle_dir = '../models/abs_models/'
    model_name_prefix = 'Bst_abs_1st_' + '%02d' % (_model_idx + 1)
    args.checkpoint_f = os.path.join(args.modle_dir, model_name_prefix + ".ckp")
    args.config_save_f = os.path.join(args.modle_dir,  model_name_prefix + ".cf")


    config = set_model_config(args)
    torch.save(config, args.config_save_f)

    model = Base_Net(config).to(device)
    model_init_w(model)
    optimizer, scheduler = init_model_optimizer(model, args)

    train_dt, dev_dt, test_dt = dataloader_merge, None, None

    _ = train(model, optimizer, scheduler, train_dt, dev_dt, args, test_dt, True)

config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_01.ckp
training end, used 177.30 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_02.ckp
training end, used 173.43 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_03.ckp
training end, used 173.93 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_04.ckp
training end, used 173.32 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_05.ckp
training end, used 174.14 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_06.ckp
training end, used 173.34 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_07.ckp
training end, used 173.78 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_08.ckp
training end, used 174.11 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_09.ckp
training end, used 174.46 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_1 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_2 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_4 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_5 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_6 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_8 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_9 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_10 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_12 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_13 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_14 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_16 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_17 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_18 *


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=174.0, style=ProgressStyle(description_wi…


e_20 *
saved checkpoint in ../models/abs_models/Bst_abs_1st_10.ckp
training end, used 173.72 s


In [5]:
import sys
sys.argv = ['']





if __name__ == "__main__":
    # set up
    parser = argparse.ArgumentParser(description='PyTorch renet')
    parser.add_argument(
            "--raw_data_dir",
            default = "/mnt/bal31/jhsu/old/data/RENET_PMC_data",
            type=str,
            help="raw data dir",
    )
    parser.add_argument(
            "--modle_dir",
            default = "/mnt/bal31/jhsu/home/git/renet2/model",
            type=str,
            help="modle data dir",
    )
    parser.add_argument(
            "--label_f_name",
            default = "labels.txt",
            type=str,
            help="modle label name",
    )
    parser.add_argument(
            "--file_name_doc",
            default = "docs.txt",
            type=str,
            help="document name",
    )
    parser.add_argument(
            "--file_name_snt",
            default = "sentences.txt",
            type=str,
            help="sentences file name",
    )
    parser.add_argument(
            "--file_name_ann",
            default = "anns.txt",
            type=str,
            help="anns file name",
    )
    parser.add_argument(
            "--word_index_fn",
            default = "/mnt/bal31/jhsu/home/git/renet2/src/utils/word_index",
            type=str,
            help="word index data dir",
    )


    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                                help='input batch size for training (default: 64)')
    parser.add_argument('--epochs', type=int, default=30, metavar='N',
                                help='number of epochs to train (default: 30)')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                                help='learning rate (default: 0.001)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                                help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                                help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=42, metavar='S',
                                help='random seed (default: 42)')
    parser.add_argument('--save-interval', type=int, default=10, metavar='N',
                                help='how many batches to wait before checkpointing')
    parser.add_argument('--resume', action='store_true', default=False,
                                help='resume training from checkpoint')
    parser.add_argument('--read_abs', action='store_true', default=False,
                                help='reading_abs_only')
    parser.add_argument('--overwrite_cache', action='store_true', default=False,
                                help='overwrite_cache')
    parser.add_argument('--fix_snt_n', type=int, default=150, metavar='N',
                                help='number of snt')
    parser.add_argument('--fix_token_n', type=int, default=150, metavar='N',
                                help='number of tokens')
    parser.add_argument('--max_doc_num', type=int, default=0, metavar='N',
                                help='number of document')
    parser.add_argument('--is_read_doc', action='store_true', default=True,
                                help='reading doc file')
    parser.add_argument('--is_filter_sub', action='store_true', default=False,
                                help='filter pmid in abs data')
    parser.add_argument('--add_cache_suf', action='store_true', default=False,
                                help='cache file suffix')
    parser.add_argument('--read_old_file', action='store_true', default=False,
                                help='reading dga files')
    parser.add_argument('--read_ori_token', action='store_true', default=False,
                                help='get raw text')
    parser.add_argument('--using_new_tokenizer', action='store_true', default=False,
                                help='using new tokenizer')
    parser.add_argument('--not_x_feature', action='store_true', default=False,
                                help='do not use x_feature')
    args = parser.parse_args()
    
    use_cuda = torch.cuda.is_available() and not args.no_cuda
    device = torch.device('cuda' if use_cuda else 'cpu')
#     device = torch.device('cuda:1')

    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)
    set_seed(args)
    args.device = device
    print(device)
    
    
    args.ori_tokenizer = loading_tokenizer(args)
    args.token_voc_l = len(args.ori_tokenizer)

#     args.read_ori_token = True
#     args.tokenizer = wp_tokenizer
#     args.token_voc_l = wp_tokenizer.cst_sz  
    
    print('tokenizer size %d' % (args.token_voc_l))
    
    args.is_read_doc = True
    args.max_doc_num = 0
    args.batch_size = 32

    args.fix_snt_n, args.fix_token_n = 32, 54    
#     args.fix_snt_n, args.fix_token_n = 250, 54

    
    if args.read_abs:
#         args.fix_snt_n = 52
        args.max_doc_num = 0
    #args.max_doc_num = 500
#     args.overwrite_cache = True

cuda
loading word index from /mnt/bal31/jhsu/home/git/renet2/src/utils/word_index
loaded word index, voc size 82948
tokenizer size 82949


In [1]:
args.overwrite_cache = True
args.file_name_snt = "sentences_p.txt"
args.add_cache_suf = '_p'

NameError: name 'args' is not defined

In [7]:
args.raw_data_dir = "/mnt/bal31/jhsu/old/data/new_renet_data/run2_rm_train/"
args.label_f_name = "labels.txt"

features_ss_all = load_and_cache_data(args)

dataset_ss_all, x_train, y_train = convert_features_to_dataset_single(features_ss_all)
dataloader_ss_all = DataLoader(dataset_ss_all, batch_size=args.batch_size)

Loading features from cached file %s /mnt/bal31/jhsu/old/data/new_renet_data/run2_rm_train/cached_all_doc_0_32_54_p
loaded end
dev 38832.0 144292 0.2691209491863721


In [8]:
train_dataset_rm, dev_dataset_rm = convert_features_to_dataset(features_ss_all, [0.8, 0.2])
tr_dataloader_rm = DataLoader(train_dataset_rm, batch_size=args.batch_size)
dev_dataloader_rm = DataLoader(dev_dataset_rm, batch_size=args.batch_size)

dev 37825.0 139291 0.2715537974456354
dev 1007.0 5001 0.20135972805438912


In [9]:
args.raw_data_dir = "/mnt/bal31/jhsu/old/data/new_renet_data/test/"
args.label_f_name = "labels.txt"

    
features_ori_t = load_and_cache_data(args)

dataset_ori_t, _, _ = convert_features_to_dataset_single(features_ori_t)
dataloader_ori_t = DataLoader(dataset_ori_t, batch_size=args.batch_size)

Loading features from cached file %s /mnt/bal31/jhsu/old/data/new_renet_data/test/cached_all_doc_0_32_54_p
loaded end
dev 1338.0 4729 0.2829350814125608


In [10]:
args.raw_data_dir = "/mnt/bal31/jhsu/old/data/new_renet_data/gd_v3_5h/"
args.label_f_name = "labels_n.txt"
    
features_ann_1 = load_and_cache_data(args)

dataset_ann_1, _, _ = convert_features_to_dataset_single(features_ann_1)
dataloader_ann_1 = DataLoader(dataset_ann_1, batch_size=args.batch_size)

Loading features from cached file %s /mnt/bal31/jhsu/old/data/new_renet_data/gd_v3_5h/cached_all_doc_0_32_54_p
loaded end
dev 975.5 2813 0.34678279416992536


In [11]:
args.raw_data_dir = "/mnt/bal31/jhsu/old/data/new_renet_data/run2_5h_gd/"
args.label_f_name = "label_v1_n5.txt"
    
features_ss_aug = load_and_cache_data(args)

dataset_ss_aug, _, _ = convert_features_to_dataset_single(features_ss_aug)
dataloader_ss_aug = DataLoader(dataset_ss_aug, batch_size=args.batch_size)

Loading features from cached file %s /mnt/bal31/jhsu/old/data/new_renet_data/run2_5h_gd/cached_all_doc_0_32_54_p
loaded end
dev 882.5 2734 0.3227871250914411


In [12]:
features_merge = np.concatenate((features_ann_1[0], features_ss_aug[0]), axis=0), \
                pd.concat([features_ann_1[1], features_ss_aug[1]])                

In [13]:
dataset_merge, _, _ = convert_features_to_dataset_single(features_merge)
dataloader_merge = DataLoader(dataset_merge, batch_size=args.batch_size)

dev 1858.0 5547 0.3349558319812511


In [13]:
args.num_embedding = 64
args.cnn_out_c = 100
args.rnn_out_f_n = 68

args.rnn_num_directions = 2
args.rnn_layers = 2
args.window_sizes = [2, 3, 4, 5]
args.EB_dp = 0.3
args.FC_dp = 0.1


args.use_new_loss = False
args.use_cls_loss = False

args.epochs = 4
args.warmup_epoch = 0
args.patience_epoch = 3

args.learning_rate = 1e-3
args.adam_epsilon = 1e-8
args.weight_decay = 1e-7
args.l2_weight_decay = 5e-6
args.max_grad_norm = 5.0
args.lr_reduce_factor = .5
args.threshold = .5
args.lr_cooldown = 2
args.use_loss_sh = False
args.is_iterare_info = False


args.modle_dir = '/mnt/bal31/jhsu/home/git/renet2/models'
model_name_prefix = 'Bst_all_abs_1'
args.checkpoint_f = os.path.join(args.modle_dir, model_name_prefix + ".ckp")
args.config_save_f = os.path.join(args.modle_dir,  model_name_prefix + ".cf")


config = set_model_config(args)
torch.save(config, args.config_save_f)

model = Base_Net(config).to(device)
model_init_w(model)
optimizer, scheduler = init_model_optimizer(model, args)

train_dt, dev_dt, test_dt = tr_dataloader_rm, dev_dataloader_rm, dataloader_ori_t

_ = train(model, optimizer, scheduler, train_dt, dev_dt, args, test_dt)

config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 4,
 'l2_weight_decay': 5e-06,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 1e-07,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin


HBox(children=(FloatProgress(value=0.0, description='Iteration', layout=Layout(flex='2'), max=4353.0, style=Pr…


e_1 dev: 0.00252, 0.722, 0.733, 0.727, 0.930 * test rst: [0.00231, 0.8330, 0.8274, 0.8301, 0.9570]
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_all_abs_1.ckp_001


HBox(children=(FloatProgress(value=0.0, description='Iteration', layout=Layout(flex='2'), max=4353.0, style=Pr…


e_2 dev: 0.00262, 0.741, 0.702, 0.721, 0.922 * test rst: [0.00238, 0.8301, 0.8072, 0.8185, 0.9543]
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_all_abs_1.ckp_002


HBox(children=(FloatProgress(value=0.0, description='Iteration', layout=Layout(flex='2'), max=4353.0, style=Pr…


e_3 dev: 0.00279, 0.716, 0.734, 0.725, 0.921 * test rst: [0.00259, 0.8068, 0.8333, 0.8199, 0.9506]
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_all_abs_1.ckp_003


HBox(children=(FloatProgress(value=0.0, description='Iteration', layout=Layout(flex='2'), max=4353.0, style=Pr…


e_4 dev: 0.00290, 0.697, 0.789, 0.740, 0.926 * test rst: [0.00255, 0.8018, 0.8617, 0.8307, 0.9572]
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_all_abs_1.ckp_004
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_all_abs_1.ckp
training end, used 1118.63 s


In [14]:
args.num_embedding = 64
args.cnn_out_c = 100
args.rnn_out_f_n = 68

args.rnn_num_directions = 2
args.rnn_layers = 2
args.window_sizes = [2, 3, 4, 5]
args.EB_dp = 0.3
args.FC_dp = 0.1

args.use_new_loss = False
args.use_cls_loss = False


args.epochs = 20
args.warmup_epoch = 0
args.patience_epoch = 3

args.learning_rate = 1e-3
args.adam_epsilon = 1e-8
args.weight_decay = 1e-4
args.l2_weight_decay = 1e-4
args.max_grad_norm = 5.0
args.lr_reduce_factor = .5
args.threshold = .5
args.lr_cooldown = 2
args.use_loss_sh = False
args.is_iterare_info = True


config = set_model_config(args)
    
    
kf = KFold(n_splits=5)
s_arr = []
run_cls_df_l = []

tar_feature = features_ann_1
for idx, (train_dev_idx, test_idx) in enumerate(kf.split(tar_feature[1])):
    cv_train_dev_ds, cv_test_ds = convert_features_to_dataset_cv_aug(tar_feature, train_dev_idx, features_ss_aug), \
                             convert_features_to_dataset_cv(tar_feature, test_idx)
    
    print('cv, step {}'.format(idx+1))
    print(len(cv_train_dev_ds), len(cv_test_ds))
    
    train_dl = DataLoader(cv_train_dev_ds, batch_size=args.batch_size)
    test_dl = DataLoader(cv_test_ds, batch_size=args.batch_size)


    model = Base_Net(config).to(device)
    model_init_w(model)
    optimizer, scheduler = init_model_optimizer(model, args)
    _ = train(model, optimizer, scheduler, train_dl, None, args, test_dl, False)


    #test
    pred_l, tru_l, S, pre_o = eval(model, test_dl, args, 'test')
    _, _, _, f1, auc = S
    print(S)
    
    s_arr.append(list(S))
    
    y_info = tar_feature[1].iloc[test_idx].copy()
    y_info['pred'] = pred_l
    y_info['prob'] = pre_o
    run_cls_df_l.append(y_info)
    print('total label sum', sum(y_info.label))
    
mer_pd_rst = pd.concat(run_cls_df_l)
s_arr = np.array(s_arr)

for r in s_arr:
    print(','.join([str(i) for i in r]))


print('mean loss, prec, recall, f1, auc')
print(list(np.mean(s_arr, axis=0)))
print('mlt train ended')

tru_l = mer_pd_rst['label'].to_numpy()
tru_l[tru_l==.5] = 0
pred_l = mer_pd_rst['pred'].to_numpy()
pred_l[pred_l==.5] = 0
pred_l = pred_l.astype(int)
precision, recall, f1, _ = \
                    precision_recall_fscore_support(tru_l, pred_l, average='binary',zero_division=1)
auc_s = roc_auc_score(mer_pd_rst['label'].to_numpy(), mer_pd_rst['prob'].to_numpy())
print('%f,%f,%f,%f' % (precision, recall, f1, auc_s))



config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
postive cnt:  197.5 563 0.35079928952042627
cv, step 1
4984 563
init model
training begin
e_1 * test rst: [0.00426, 0.6845, 0.6765, 0.6805, 0.8442]
e_2 * test rst: [0.00445, 0.6649, 0.7471, 0.7036, 0.8591]
e_3 * test rst: [0.00556, 0.5387, 0.8588, 0.6621, 0.8623]
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.
e_4 * test 

e_17 * test rst: [0.00519, 0.7260, 0.6272, 0.6730, 0.8306]
e_18 * test rst: [0.00523, 0.7376, 0.6154, 0.6710, 0.8308]
e_19 * test rst: [0.00516, 0.7483, 0.6509, 0.6962, 0.8320]
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.
e_20 * test rst: [0.00516, 0.7310, 0.6272, 0.6752, 0.8315]
training end, used 153.99 s
(0.005164859266255674, 0.7310344827586207, 0.6272189349112426, 0.6751592356687898, 0.8314588132556424)
total label sum 188.0
postive cnt:  184.5 562 0.32829181494661924
cv, step 5
4985 562
init model
training begin
e_1 * test rst: [0.00417, 0.6849, 0.6061, 0.6431, 0.8530]
e_2 * test rst: [0.00400, 0.7389, 0.7030, 0.7205, 0.8716]
e_3 * test rst: [0.00593, 0.5282, 0.9091, 0.6682, 0.8803]
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.
e_4 * test rst: [0.00386, 0.7412, 0.7636, 0.7522, 0.8755]
e_5 * test rst: [0.00384, 0.7326, 0.7636,

In [15]:
args.num_embedding = 64
args.cnn_out_c = 100
args.rnn_out_f_n = 68

args.rnn_num_directions = 2
args.rnn_layers = 2
args.window_sizes = [2, 3, 4, 5]
args.EB_dp = 0.3
args.FC_dp = 0.1

args.use_new_loss = False
args.use_cls_loss = False


args.epochs = 20
args.warmup_epoch = 0
args.patience_epoch = 3

args.learning_rate = 1e-3
args.adam_epsilon = 1e-8
args.weight_decay = 1e-4
args.l2_weight_decay = 1e-4
args.max_grad_norm = 5.0
args.lr_reduce_factor = .5
args.threshold = .5
args.lr_cooldown = 2
args.use_loss_sh = False
args.is_iterare_info = True


args.modle_dir = '/mnt/bal31/jhsu/home/git/renet2/models'
model_name_prefix = 'Bst_abs_1'
args.checkpoint_f = os.path.join(args.modle_dir, model_name_prefix + ".ckp")
args.config_save_f = os.path.join(args.modle_dir,  model_name_prefix + ".cf")


config = set_model_config(args)
torch.save(config, args.config_save_f)

model = Base_Net(config).to(device)
model_init_w(model)
optimizer, scheduler = init_model_optimizer(model, args)

train_dt, dev_dt, test_dt = dataloader_merge, None, None

_ = train(model, optimizer, scheduler, train_dt, dev_dt, args, test_dt, True)

config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin
e_1 *
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_abs_1.ckp_001
e_2 *
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_abs_1.ckp_002
e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_abs_1.ckp_003


In [14]:
for _model_idx in range(10):
    args.num_embedding = 64
    args.cnn_out_c = 100
    args.rnn_out_f_n = 68

    args.rnn_num_directions = 2
    args.rnn_layers = 2
    args.window_sizes = [2, 3, 4, 5]
    args.EB_dp = 0.3
    args.FC_dp = 0.1

    args.use_new_loss = False
    args.use_cls_loss = False


    args.epochs = 20
    args.warmup_epoch = 0
    args.patience_epoch = 3

    args.learning_rate = 1e-3
    args.adam_epsilon = 1e-8
    args.weight_decay = 1e-4
    args.l2_weight_decay = 1e-4
    args.max_grad_norm = 5.0
    args.lr_reduce_factor = .5
    args.threshold = .5
    args.lr_cooldown = 2
    args.use_loss_sh = False
    args.is_iterare_info = True


    args.modle_dir = '/mnt/bal31/jhsu/home/git/renet2/models'
    model_name_prefix = 'Bst_abs_' + '%02d' % (_model_idx + 1)
    args.checkpoint_f = os.path.join(args.modle_dir, model_name_prefix + ".ckp")
    args.config_save_f = os.path.join(args.modle_dir,  model_name_prefix + ".cf")


    config = set_model_config(args)
    torch.save(config, args.config_save_f)

    model = Base_Net(config).to(device)
    model_init_w(model)
    optimizer, scheduler = init_model_optimizer(model, args)

    train_dt, dev_dt, test_dt = dataloader_merge, None, None

    _ = train(model, optimizer, scheduler, train_dt, dev_dt, args, test_dt, True)

config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,
 'rnn_out_f_n': 68,
 'threshold': 0.5,
 'use_new_loss': False,
 'warmup_epoch': 0,
 'weight_decay': 0.0001,
 'window_sizes': [2, 3, 4, 5]}
       ----------------
init model
training begin
e_1 *
e_2 *
e_3 *
Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Epoch     2: reducing learning rate of group 1 to 5.0000e-04.
e_4 *
e_5 *
e_6 *
e_7 *
Epoch     6: reducing learning rate of group 0 to 2.5000e-04.
Epoch     6: reducing learning rate of group 1 to 2.5000e-04.
e_8 *
e_9 *
e_10 *
e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e

e_8 *
e_9 *
e_10 *
e_11 *
Epoch    10: reducing learning rate of group 0 to 1.2500e-04.
Epoch    10: reducing learning rate of group 1 to 1.2500e-04.
e_12 *
e_13 *
e_14 *
e_15 *
Epoch    14: reducing learning rate of group 0 to 6.2500e-05.
Epoch    14: reducing learning rate of group 1 to 6.2500e-05.
e_16 *
e_17 *
e_18 *
e_19 *
Epoch    18: reducing learning rate of group 0 to 3.1250e-05.
Epoch    18: reducing learning rate of group 1 to 3.1250e-05.
e_20 *
saved checkpoint in /mnt/bal31/jhsu/home/git/renet2/models/Bst_abs_06.ckp
training end, used 159.31 s
config ----------------
{'EB_dp': 0.3,
 'FC_dp': 0.1,
 'adam_epsilon': 1e-08,
 'batch_size': 32,
 'cnn_out_c': 100,
 'device': device(type='cuda'),
 'epochs': 20,
 'l2_weight_decay': 0.0001,
 'learning_rate': 0.001,
 'lr_cooldown': 2,
 'lr_reduce_factor': 0.5,
 'max_grad_norm': 5.0,
 'max_token_n': 54,
 'not_x_feature': False,
 'num_embedding': 64,
 'num_words': 82949,
 'patience_epoch': 3,
 'rnn_layers': 2,
 'rnn_num_directions': 2,