In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from model_test import Net
from data_load import NerDataset, pad, VOCAB, tokenizer, tag2idx, idx2tag
import os
import numpy as np
import argparse
import glob

class Arg():
    def __init__(self, check_path):
        self.checkpoint = check_path
        self.batch_size = 8

In [11]:
def eval(model, iterator):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch
            print(x.shape)
            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])

    num_proposed = len(y_pred[y_pred>1])
    num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum()
    num_gold = len(y_true[y_true>1])

    print(f"num_proposed:{num_proposed}")
    print(f"num_correct:{num_correct}")
    print(f"num_gold:{num_gold}")
    try:
        precision = num_correct / num_proposed
    except ZeroDivisionError:
        precision = 1.0

    try:
        recall = num_correct / num_gold
    except ZeroDivisionError:
        recall = 1.0

    try:
        f1 = 2*precision*recall / (precision + recall)
    except ZeroDivisionError:
        if precision*recall==0:
            f1=1.0
        else:
            f1=0

    os.remove("temp")

    print("precision=%.2f"%precision)
    print("recall=%.2f"%recall)
    print("f1=%.2f"%f1)
    return precision, recall, f1

In [23]:
emb = nn.Embedding(31090, 768, padding_idx=0)
x = torch.randn(8, 41).long()
print(x)
emb(x)

tensor([[ 0,  0,  0,  0,  0, -1,  0,  2,  0,  0,  0,  0, -2, -1, -1,  0,  0,  0,
          0,  1, -1,  0, -1, -1, -1,  0,  1,  1,  0,  1,  0, -1,  2,  0, -1, -1,
          0, -1,  0,  2,  0],
        [-1,  1,  0,  0, -1,  1,  2,  0,  0,  0,  0, -1,  2,  0,  0, -3, -1,  0,
          0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0],
        [ 1,  0,  0,  0, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          1,  0, -1,  0,  0,  0,  0,  0,  0,  0, -1,  1,  0,  0,  1,  0,  0,  2,
          1, -1,  0,  1,  0],
        [ 0,  0,  1,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,
          2,  0,  0, -1,  0,  0,  0,  1,  0,  0,  0, -1,  0,  1,  0,  0,  0,  0,
          0, -1,  0,  0,  0],
        [ 1,  0,  0, -1,  0, -1,  1,  0,  1,  1,  0, -1,  0,  0,  0, -1,  0,  0,
          0, -1,  0, -1,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,  2,
          0, -1,  0,  0,  0],
        [ 1,  1,  0,  0,  0,  0,  0,  0,

RuntimeError: index out of range: Tried to access index -1 out of table with 31089 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418

In [13]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e5_pretrained_kdd/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        model = Net(False, len(VOCAB), 'cpu', False)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)
        model.bert.embeddings.word_embeddings = nn.Embedding(20151, 768, padding_idx=0)
        #print(model)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_10pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_10pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_11pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_12pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_12pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_13pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_14pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_14pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_15pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_16pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_16pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_17pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_18pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_19pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_1pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_20pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_21pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_22pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_22pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_23pt/4.pt
/

RuntimeError: index out of range: Tried to access index 26299 out of table with 20150 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418

## 기본 pre-trained-bert에 www kdd finetuning후 test

In [3]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7f1327f27c18>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/1.pt
num_proposed:411
num_correct:225
num_gold:1121
precision=0.55
recall=0.20
f1=0.29

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/2.pt
num_proposed:779
num_correct:387
num_gold:1121
precision=0.50
recall=0.35
f1=0.41

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/3.pt
num_proposed:799
num_correct:363
num_gold:1121
precision=0.45
recall=0.32
f1=0.38

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/4.pt
num_proposed:674
num_correct:318
num_gold:1121
precision=0.47
recall=0.28
f1=0.35

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/5.pt
num_proposed:941
num_correct:417
num_gold:1121
precision=0.44
recall=0.37
f1=0.40

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/6.pt
num_proposed:1024
num_correct:426
num_gold:1121
precision=0.42
recall=0.38


## KP20K에 바로 KP20K 테스트를 넣으면?

In [3]:
# if __name__=="__main__":
#     check_lists=[]
#     check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/*.pt")
#     check_len = len(check_list)
#     for i in range(1,check_len+1):
#         check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/{}.pt".format(i))
#     #testset = "/home/cilab/LabMembers/YS/kp20k/finetuning/test500.txt"
#     testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
#     print("load check point of model...")

#     model = Net(False, len(VOCAB), 'cpu', False)
#     eval_dataset = NerDataset(testset)
#     print(eval_dataset)
#     eval_iter = data.DataLoader(dataset=eval_dataset,
#                                      batch_size=8,
#                                      shuffle=False,
#                                      num_workers=4,
#                                      collate_fn=pad)
#     max_f1 = 0
#     max_pt = ""
#     f1_list = []
#     for check in check_lists:
#         print("\nCheck Point : ",check)
#         hp = Arg(check)
#         checkpoint = torch.load(hp.checkpoint)
#         model.load_state_dict(checkpoint['model_state_dict'],strict=False)

#         precision, recall, f1 = eval(model, eval_iter)
#         f1_list.append(format(f1, '.2f'))
#         if max_f1<f1:
#             max_f1 = f1
#             max_pt = check
#     print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
#     print(f1_list)
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp20/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    
    testset = "/home/cilab/LabMembers/YS/kp20k/finetuning/test500.txt"
    
    print("load check point of model...")

    for i in dir_list:
        if i.endswith('.pt'):
            checkpoint.append(i)
    
    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    




load check point of model...
<data_load.NerDataset object at 0x7fcdd35ca278>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp20/1.pt
num_proposed:32023
num_correct:1166
num_gold:6497
precision=0.04
recall=0.18
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp20/10.pt
num_proposed:40887
num_correct:1368
num_gold:6497
precision=0.03
recall=0.21
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp20/11.pt


KeyboardInterrupt: 

## ============ Bert에 Inspec으로 FInetuning후 Test

In [20]:
if __name__=="__main__":
    check_lists=[]
    check_lists = glob.glob("/mnt_data/pretrained_bert_Inspec/*.pt")
    check_len = len(check_list)
    testset = "/home/cilab/LabMembers/YS/Inspec/data/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7f8a181e0cc0>

Check Point :  /mnt_data/pretrained_bert_Inspec/1.pt
num_proposed:1233
num_correct:697
num_gold:2430
precision=0.57
recall=0.29
f1=0.38

Check Point :  /mnt_data/pretrained_bert_Inspec/4.pt
num_proposed:1888
num_correct:899
num_gold:2430
precision=0.48
recall=0.37
f1=0.42

Check Point :  /mnt_data/pretrained_bert_Inspec/3.pt
num_proposed:2967
num_correct:1290
num_gold:2430
precision=0.43
recall=0.53
f1=0.48

Check Point :  /mnt_data/pretrained_bert_Inspec/2.pt
num_proposed:2043
num_correct:978
num_gold:2430
precision=0.48
recall=0.40
f1=0.44

Check Point :  /mnt_data/pretrained_bert_Inspec/5.pt
num_proposed:2396
num_correct:1124
num_gold:2430
precision=0.47
recall=0.46
f1=0.47


3.pt : F1_Score : 0.4780433574207893
['0.38', '0.42', '0.48', '0.44', '0.47']


In [None]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model = Net(False, len(VOCAB), 'cpu', False)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)
        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7fbe316179e8>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/1.pt
num_proposed:1402
num_correct:729
num_gold:1738
precision=0.52
recall=0.42
f1=0.46

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/2.pt
num_proposed:1088
num_correct:615
num_gold:1738
precision=0.57
recall=0.35
f1=0.44

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/3.pt
num_proposed:1702
num_correct:826
num_gold:1738
precision=0.49
recall=0.48
f1=0.48

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/4.pt


## kp20k 40% (pre-trained) KDD / WWW  test

In [5]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model = Net(False, len(VOCAB), 'cpu', False)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)
        model.bert.embeddings.word_embeddings = nn.Embedding(50191, 768)
        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7fbd9d53b0f0>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/1.pt
num_proposed:5998
num_correct:218
num_gold:1121
precision=0.04
recall=0.19
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/2.pt
num_proposed:9233
num_correct:309
num_gold:1121
precision=0.03
recall=0.28
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/3.pt
num_proposed:3502
num_correct:144
num_gold:1121
precision=0.04
recall=0.13
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/4.pt
num_proposed:6808
num_correct:256
num_gold:1121
precision=0.04
recall=0.23
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/5.pt
num_proposed:11155
num_correct:360
num_gold:1121
precision=0.03
recall=0.32
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/6.pt
num_proposed:11

KeyboardInterrupt: 

In [None]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



## kp20k 60% (pre-trained) KDD / WWW


In [7]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7f13c8353860>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/1.pt
num_proposed:5274
num_correct:128
num_gold:1121
precision=0.02
recall=0.11
f1=0.04

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/2.pt
num_proposed:6302
num_correct:154
num_gold:1121
precision=0.02
recall=0.14
f1=0.04

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/3.pt
num_proposed:4227
num_correct:68
num_gold:1121
precision=0.02
recall=0.06
f1=0.03

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/4.pt
num_proposed:4556
num_correct:68
num_gold:1121
precision=0.01
recall=0.06
f1=0.02

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/5.pt
num_proposed:5099
num_correct:85
num_gold:1121
precision=0.02
recall=0.08
f1=0.03

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/6.pt
num_proposed:3990
n

In [8]:
if __name__=="__main__":
    check_lists=[]
    check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/*.pt")
    check_len = len(check_list)
    for i in range(1,check_len+1):
        check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/{}.pt".format(i))
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
<data_load.NerDataset object at 0x7f13280e2e10>

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/1.pt
num_proposed:7357
num_correct:227
num_gold:1738
precision=0.03
recall=0.13
f1=0.05

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/2.pt
num_proposed:8794
num_correct:246
num_gold:1738
precision=0.03
recall=0.14
f1=0.05

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/3.pt
num_proposed:5699
num_correct:176
num_gold:1738
precision=0.03
recall=0.10
f1=0.05

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/4.pt
num_proposed:5930
num_correct:164
num_gold:1738
precision=0.03
recall=0.09
f1=0.04

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/5.pt
num_proposed:6765
num_correct:196
num_gold:1738
precision=0.03
recall=0.11
f1=0.05

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp60/6.pt
num_proposed:523

## kp_60%_5e-5_pretrained_test_WWW

In [88]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_60p_5e5_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_60p_5e5_pretrained_www/www_23pt/2.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_www/www_23pt/3.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_www/www_23pt/4.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_www/www_23pt/5.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_www/www_10pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_11pt/3.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_12pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_13pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_14pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_15pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_15pt/4.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_16pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_17pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_18pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_18pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_19pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_1pt/3.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_20pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_www/www_21pt/2.pt

num_proposed:1300
num_correct:681
num_gold:1738
precision=0.52
recall=0.39
f1=0.45

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_38pt/4.pt
num_proposed:1505
num_correct:709
num_gold:1738
precision=0.47
recall=0.41
f1=0.44

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_38pt/5.pt
num_proposed:2340
num_correct:883
num_gold:1738
precision=0.38
recall=0.51
f1=0.43

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_39pt/2.pt
num_proposed:1641
num_correct:815
num_gold:1738
precision=0.50
recall=0.47
f1=0.48

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_3pt/3.pt
num_proposed:1825
num_correct:811
num_gold:1738
precision=0.44
recall=0.47
f1=0.46

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_3pt/4.pt
num_proposed:1543
num_correct:700
num_gold:1738
precision=0.45
recall=0.40
f1=0.43

Check Point :  /mnt_data/kp_60p_5e5_pretrained_www/www_40pt/1.pt
num_proposed:2308
num_correct:887
num_gold:1738
precision=0.38
recall=0.51
f1=0.44

Check Point :  /mnt_data

## kp_40%_5e-5_pretrained_test_WWW

In [89]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_40p_5e5_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_40p_5e5_pretrained_www/www_10pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_11pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_12pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_13pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_14pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_15pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_16pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_17pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_18pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_19pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_1pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_20pt/5.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_21pt/5.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_22pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_23pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_24pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_25pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_26pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_26pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_www/www_27pt/1.pt
/

num_proposed:2623
num_correct:1076
num_gold:1738
precision=0.41
recall=0.62
f1=0.49

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_3pt/4.pt
num_proposed:1584
num_correct:722
num_gold:1738
precision=0.46
recall=0.42
f1=0.43

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_3pt/5.pt
num_proposed:1467
num_correct:686
num_gold:1738
precision=0.47
recall=0.39
f1=0.43

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_40pt/5.pt
num_proposed:1482
num_correct:696
num_gold:1738
precision=0.47
recall=0.40
f1=0.43

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_4pt/2.pt
num_proposed:1572
num_correct:775
num_gold:1738
precision=0.49
recall=0.45
f1=0.47

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_5pt/1.pt
num_proposed:2479
num_correct:981
num_gold:1738
precision=0.40
recall=0.56
f1=0.47

Check Point :  /mnt_data/kp_40p_5e5_pretrained_www/www_6pt/1.pt
num_proposed:2102
num_correct:905
num_gold:1738
precision=0.43
recall=0.52
f1=0.47

Check Point :  /mnt_data/k

## kp_20%_5e-5_pretrained_test_WWW

In [90]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e5_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_20p_5e5_pretrained_www/www_10pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_11pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_12pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_13pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_14pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_14pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_15pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_16pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_17pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_17pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_18pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_19pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_19pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_1pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_20pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_20pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_21pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_22pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_23pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_www/www_24pt/3.pt
/

num_proposed:1975
num_correct:834
num_gold:1738
precision=0.42
recall=0.48
f1=0.45

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_39pt/4.pt
num_proposed:2435
num_correct:931
num_gold:1738
precision=0.38
recall=0.54
f1=0.45

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_3pt/3.pt
num_proposed:2046
num_correct:833
num_gold:1738
precision=0.41
recall=0.48
f1=0.44

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_40pt/1.pt
num_proposed:2076
num_correct:920
num_gold:1738
precision=0.44
recall=0.53
f1=0.48

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_4pt/1.pt
num_proposed:1715
num_correct:833
num_gold:1738
precision=0.49
recall=0.48
f1=0.48

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_5pt/1.pt
num_proposed:1787
num_correct:875
num_gold:1738
precision=0.49
recall=0.50
f1=0.50

Check Point :  /mnt_data/kp_20p_5e5_pretrained_www/www_5pt/2.pt
num_proposed:2614
num_correct:1019
num_gold:1738
precision=0.39
recall=0.59
f1=0.47

Check Point :  /mnt_data/

## =========================================================

## 5e-6 finetuning by WWW on the 20percent of the KP20K

In [102]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e6_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_20p_5e6_pretrained_www/www_10pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_11pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_12pt/4.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_12pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_13pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_14pt/2.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_15pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_16pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_17pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_18pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_19pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_1pt/2.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_20pt/4.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_21pt/3.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_21pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_22pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_23pt/4.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_23pt/5.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_24pt/4.pt
/mnt_data/kp_20p_5e6_pretrained_www/www_25pt/4.pt
/

num_proposed:1689
num_correct:828
num_gold:1738
precision=0.49
recall=0.48
f1=0.48

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_40pt/4.pt
num_proposed:1991
num_correct:907
num_gold:1738
precision=0.46
recall=0.52
f1=0.49

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_4pt/4.pt
num_proposed:1492
num_correct:772
num_gold:1738
precision=0.52
recall=0.44
f1=0.48

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_5pt/4.pt
num_proposed:1586
num_correct:810
num_gold:1738
precision=0.51
recall=0.47
f1=0.49

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_6pt/4.pt
num_proposed:1692
num_correct:807
num_gold:1738
precision=0.48
recall=0.46
f1=0.47

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_7pt/4.pt
num_proposed:1405
num_correct:754
num_gold:1738
precision=0.54
recall=0.43
f1=0.48

Check Point :  /mnt_data/kp_20p_5e6_pretrained_www/www_8pt/4.pt
num_proposed:1794
num_correct:855
num_gold:1738
precision=0.48
recall=0.49
f1=0.48

Check Point :  /mnt_data/kp

## 5e-6 finetuning by WWW on the 40percent of the KP20K

In [103]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_40p_5e6_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_40p_5e6_pretrained_www/www_10pt/3.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_11pt/3.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_12pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_13pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_14pt/3.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_15pt/4.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_16pt/4.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_16pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_17pt/2.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_18pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_19pt/2.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_1pt/2.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_1pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_20pt/4.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_20pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_21pt/2.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_21pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_22pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_23pt/5.pt
/mnt_data/kp_40p_5e6_pretrained_www/www_24pt/5.pt
/m

num_proposed:1755
num_correct:838
num_gold:1738
precision=0.48
recall=0.48
f1=0.48

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_38pt/5.pt
num_proposed:2096
num_correct:918
num_gold:1738
precision=0.44
recall=0.53
f1=0.48

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_39pt/5.pt
num_proposed:1575
num_correct:775
num_gold:1738
precision=0.49
recall=0.45
f1=0.47

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_3pt/5.pt
num_proposed:1669
num_correct:803
num_gold:1738
precision=0.48
recall=0.46
f1=0.47

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_40pt/3.pt
num_proposed:1975
num_correct:908
num_gold:1738
precision=0.46
recall=0.52
f1=0.49

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_4pt/4.pt
num_proposed:1801
num_correct:876
num_gold:1738
precision=0.49
recall=0.50
f1=0.50

Check Point :  /mnt_data/kp_40p_5e6_pretrained_www/www_5pt/3.pt
num_proposed:1464
num_correct:769
num_gold:1738
precision=0.53
recall=0.44
f1=0.48

Check Point :  /mnt_data/

## 5e-6 finetuning by WWW on the 60percent of the KP20K

In [104]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_60p_5e6_pretrained_www/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_60p_5e6_pretrained_www/www_10pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_11pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_11pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_12pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_12pt/5.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_13pt/2.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_14pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_15pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_15pt/5.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_16pt/5.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_17pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_17pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_18pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_18pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_19pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_1pt/3.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_1pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_20pt/4.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_20pt/5.pt
/mnt_data/kp_60p_5e6_pretrained_www/www_21pt/4.pt
/m

num_proposed:1313
num_correct:731
num_gold:1738
precision=0.56
recall=0.42
f1=0.48

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_32pt/4.pt
num_proposed:1822
num_correct:874
num_gold:1738
precision=0.48
recall=0.50
f1=0.49

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_33pt/5.pt
num_proposed:1784
num_correct:854
num_gold:1738
precision=0.48
recall=0.49
f1=0.48

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_34pt/2.pt
num_proposed:1714
num_correct:836
num_gold:1738
precision=0.49
recall=0.48
f1=0.48

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_35pt/3.pt
num_proposed:2055
num_correct:922
num_gold:1738
precision=0.45
recall=0.53
f1=0.49

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_36pt/5.pt
num_proposed:1152
num_correct:653
num_gold:1738
precision=0.57
recall=0.38
f1=0.45

Check Point :  /mnt_data/kp_60p_5e6_pretrained_www/www_37pt/5.pt
num_proposed:1395
num_correct:737
num_gold:1738
precision=0.53
recall=0.42
f1=0.47

Check Point :  /mnt_da

## sci-bert finetuning by WWW

In [43]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/sci_WWW/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



load check point of model...
<data_load.NerDataset object at 0x7fd264738f98>


 : F1_Score : 0
[]


## ==================================
## ==================================

## pretrained by kp20k 60percent  and  finetuning by kdd 

In [35]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e5_pretrained_kdd/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                #print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                #print(check)
                print(check.split('/')[-2].split('_')[-1].replace('pt',''))
                dictionary[int(check.split('/')[-2].split('_')[-1].replace('pt',''))] = check.split('/')[-1]
    print("="*80)
    print('='*80)
    #sort(dictionary.items())
    #sorted(dictionary, key=lambda k : dictionary[k])
    dictionary = sorted(dictionary.items(), key=lambda x: int(x[0]))
    print(dictionary)
    #print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in range(len(dictionary)):
        print(dictionary[i])

10
10
11
12
12
13
14
14
15
16
16
17
18
19
1
20
21
22
22
23
23
24
25
26
27
28
29
2
30
30
31
32
33
34
35
36
37
38
39
3
3
40
4
5
6
7
7
8
9
[(1, '2.P0.51_R0.42_F0.46'), (2, '1.P0.51_R0.46_F0.48'), (3, '4.P0.47_R0.41_F0.44'), (4, '3.P0.51_R0.39_F0.44'), (5, '1.P0.53_R0.42_F0.47'), (6, '4.P0.44_R0.48_F0.46'), (7, '5.P0.50_R0.39_F0.44'), (8, '5.P0.45_R0.47_F0.46'), (9, '5.P0.41_R0.44_F0.43'), (10, '2.P0.46_R0.48_F0.47'), (11, '3.P0.51_R0.44_F0.47'), (12, '2.P0.50_R0.39_F0.44'), (13, '1.P0.42_R0.49_F0.45'), (14, '2.P0.50_R0.47_F0.48'), (15, '4.P0.50_R0.39_F0.44'), (16, '5.P0.40_R0.49_F0.44'), (17, '2.P0.55_R0.40_F0.47'), (18, '1.P0.40_R0.60_F0.48'), (19, '5.P0.53_R0.40_F0.46'), (20, '2.P0.50_R0.40_F0.45'), (21, '3.P0.48_R0.52_F0.50'), (22, '5.P0.42_R0.47_F0.44'), (23, '5.P0.39_R0.49_F0.44'), (24, '2.P0.39_R0.58_F0.47'), (25, '2.P0.48_R0.44_F0.46'), (26, '3.P0.48_R0.43_F0.45'), (27, '4.P0.40_R0.51_F0.45'), (28, '2.P0.57_R0.35_F0.43'), (29, '2.P0.51_R0.39_F0.45'), (30, '4.P0.37_R0.51_F0.43'), (3

In [10]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e5_pretrained_kdd/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        model = Net(False, len(VOCAB), 'cpu', False)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)
        model.bert.embeddings.word_embeddings = nn.Embedding(20151, 768, padding_idx=0)
        print(model)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_10pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_10pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_11pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_12pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_12pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_13pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_14pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_14pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_15pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_16pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_16pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_17pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_18pt/1.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_19pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_1pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_20pt/2.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_21pt/3.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_22pt/4.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_22pt/5.pt
/mnt_data/kp_20p_5e5_pretrained_kdd/kdd_23pt/4.pt
/

RuntimeError: index out of range: Tried to access index 26299 out of table with 20150 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418

In [40]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_40p_5e5_pretrained_kdd/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_27pt/1.P0.00_R0.00_Fnan
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_10pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_11pt/4.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_12pt/5.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_13pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_14pt/4.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_14pt/5.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_15pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_16pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_17pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_18pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_19pt/2.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_1pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_20pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_21pt/1.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_22pt/5.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_23pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_24pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/kdd_25pt/3.pt
/mnt_data/kp_40p_5e5_pretrained_kdd/k

num_proposed:948
num_correct:422
num_gold:1121
precision=0.45
recall=0.38
f1=0.41

Check Point :  /mnt_data/kp_40p_5e5_pretrained_kdd/kdd_8pt/1.pt
num_proposed:866
num_correct:374
num_gold:1121
precision=0.43
recall=0.33
f1=0.38

Check Point :  /mnt_data/kp_40p_5e5_pretrained_kdd/kdd_8pt/4.pt
num_proposed:937
num_correct:410
num_gold:1121
precision=0.44
recall=0.37
f1=0.40

Check Point :  /mnt_data/kp_40p_5e5_pretrained_kdd/kdd_9pt/1.pt
num_proposed:1033
num_correct:487
num_gold:1121
precision=0.47
recall=0.43
f1=0.45


2.pt : F1_Score : 0.45896003437902877
['0.40', '0.41', '0.35', '0.41', '0.41', '0.39', '0.43', '0.41', '0.40', '0.40', '0.42', '0.42', '0.40', '0.40', '0.42', '0.44', '0.42', '0.42', '0.44', '0.44', '0.45', '0.43', '0.46', '0.40', '0.44', '0.40', '0.43', '0.40', '0.38', '0.45', '0.42', '0.44', '0.41', '0.43', '0.42', '0.40', '0.39', '0.44', '0.43', '0.41', '0.38', '0.40', '0.45']


In [41]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_60p_5e5_pretrained_kdd/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_14pt/1.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_14pt/2.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_14pt/3.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_14pt/4.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_14pt/5.Pnan_R0.00_Fnan
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_10pt/3.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_11pt/3.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_12pt/4.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_13pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_15pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_16pt/4.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_17pt/5.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_18pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_19pt/5.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_1pt/1.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_20pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_21pt/4.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/kdd_22pt/2.pt
/mnt_data/kp_60p_5e5_pretrained_kdd/

num_proposed:1049
num_correct:456
num_gold:1121
precision=0.43
recall=0.41
f1=0.42

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_4pt/2.pt
num_proposed:937
num_correct:410
num_gold:1121
precision=0.44
recall=0.37
f1=0.40

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_4pt/3.pt
num_proposed:851
num_correct:398
num_gold:1121
precision=0.47
recall=0.36
f1=0.40

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_4pt/4.pt
num_proposed:1140
num_correct:449
num_gold:1121
precision=0.39
recall=0.40
f1=0.40

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_4pt/5.pt
num_proposed:908
num_correct:390
num_gold:1121
precision=0.43
recall=0.35
f1=0.38

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_5pt/3.pt
num_proposed:845
num_correct:406
num_gold:1121
precision=0.48
recall=0.36
f1=0.41

Check Point :  /mnt_data/kp_60p_5e5_pretrained_kdd/kdd_6pt/4.pt
num_proposed:1002
num_correct:436
num_gold:1121
precision=0.44
recall=0.39
f1=0.41

Check Point :  /mnt_data/kp_60p_

## =======================================================

In [3]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_60p_5e5_pretrained_Inspec/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                #print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                #print(check)
                print(check.split('/')[-2].split('_')[-1].replace('pt',''))
                dictionary[int(check.split('/')[-2].split('_')[-1].replace('pt',''))] = check.split('/')[-1]
    print("="*80)
    print('='*80)
    #sort(dictionary.items())
    #sorted(dictionary, key=lambda k : dictionary[k])
    dictionary = sorted(dictionary.items(), key=lambda x: int(x[0]))
    print(dictionary)
    #print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in range(len(dictionary)):
        print(dictionary[i])

10
11
12
13
14
15
15
16
17
17
18
19
1
20
20
20
21
21
21
22
22
22
23
23
23
24
25
26
26
27
28
29
2
30
31
32
32
34
35
36
37
38
38
39
3
3
40
4
5
6
7
8
[(1, '4.P0.43_R0.46_F0.44'), (2, '3.P0.44_R0.44_F0.44'), (3, '4.P0.42_R0.42_F0.42'), (4, '2.P0.42_R0.46_F0.44'), (5, '4.P0.43_R0.45_F0.44'), (6, '3.P0.45_R0.43_F0.44'), (7, '3.P0.43_R0.47_F0.45'), (8, '3.P0.43_R0.48_F0.46'), (10, '5.P0.45_R0.43_F0.44'), (11, '4.P0.46_R0.40_F0.43'), (12, '4.P0.40_R0.47_F0.43'), (13, '5.P0.41_R0.45_F0.43'), (14, '2.P0.44_R0.42_F0.43'), (15, '4.P0.48_R0.38_F0.43'), (16, '4.P0.47_R0.35_F0.40'), (17, '5.P0.41_R0.49_F0.44'), (18, '4.P0.43_R0.43_F0.43'), (19, '4.P0.42_R0.44_F0.43'), (20, '5.P0.43_R0.43_F0.43'), (21, '5.P0.38_R0.46_F0.42'), (22, '4.P0.45_R0.39_F0.42'), (23, '5.P0.44_R0.42_F0.43'), (24, '3.P0.35_R0.57_F0.43'), (25, '4.P0.48_R0.38_F0.42'), (26, '4.P0.39_R0.50_F0.44'), (27, '4.P0.43_R0.44_F0.44'), (28, '5.P0.43_R0.43_F0.43'), (29, '3.P0.42_R0.48_F0.45'), (30, '2.P0.47_R0.36_F0.41'), (31, '3.P0.42_R0.46

In [16]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_60p_5e5_pretrained_Inspec/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    print(dir_list)
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/Inspec/data/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



['/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_10pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_11pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_12pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_13pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_14pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_15pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_16pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_17pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_18pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_19pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_1pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_20pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_21pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_22pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_23pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_24pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_25pt', '/mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_26pt', '/mnt_data/kp_60p_5e5_pretra

num_proposed:2397
num_correct:1080
num_gold:2430
precision=0.45
recall=0.44
f1=0.45

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_21pt/5.pt
num_proposed:3171
num_correct:1286
num_gold:2430
precision=0.41
recall=0.53
f1=0.46

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_22pt/2.pt
num_proposed:2415
num_correct:1130
num_gold:2430
precision=0.47
recall=0.47
f1=0.47

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_22pt/3.pt
num_proposed:2843
num_correct:1189
num_gold:2430
precision=0.42
recall=0.49
f1=0.45

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_22pt/4.pt
num_proposed:2293
num_correct:1099
num_gold:2430
precision=0.48
recall=0.45
f1=0.47

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_23pt/1.pt
num_proposed:3229
num_correct:1291
num_gold:2430
precision=0.40
recall=0.53
f1=0.46

Check Point :  /mnt_data/kp_60p_5e5_pretrained_Inspec/inspec_23pt/4.pt
num_proposed:2196
num_correct:1063
num_gold:2430
precision=0.48


In [17]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_40p_5e5_pretrained_Inspec/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    print(dir_list)
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/Inspec/data/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



['/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_10pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_11pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_12pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_13pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_14pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_15pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_16pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_17pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_18pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_19pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_1pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_20pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_21pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_22pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_23pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_24pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_25pt', '/mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_26pt', '/mnt_data/kp_40p_5e5_pretra

num_proposed:3007
num_correct:1188
num_gold:2430
precision=0.40
recall=0.49
f1=0.44

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_25pt/5.pt
num_proposed:2977
num_correct:1163
num_gold:2430
precision=0.39
recall=0.48
f1=0.43

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_26pt/2.pt
num_proposed:3593
num_correct:1385
num_gold:2430
precision=0.39
recall=0.57
f1=0.46

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_27pt/1.pt
num_proposed:3306
num_correct:1325
num_gold:2430
precision=0.40
recall=0.55
f1=0.46

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_27pt/5.pt
num_proposed:3191
num_correct:1247
num_gold:2430
precision=0.39
recall=0.51
f1=0.44

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_28pt/2.pt
num_proposed:4153
num_correct:1508
num_gold:2430
precision=0.36
recall=0.62
f1=0.46

Check Point :  /mnt_data/kp_40p_5e5_pretrained_Inspec/inspec_28pt/4.pt
num_proposed:2830
num_correct:1177
num_gold:2430
precision=0.42


In [18]:
import re
import glob

if __name__=="__main__":
    check_lists=[]
    dir_list = glob.glob("/mnt_data/kp_20p_5e5_pretrained_Inspec/*")
    dir_list.sort()
    dictionary = {}
    checkpoint = []
    print(dir_list)
    for directory in dir_list:
        check_list = glob.glob(directory+"/*")
        check_list.sort()
        max = 0
        for check in check_list:
            if check[-3:] == ".pt":
                check_list.remove(check)
        for check in check_list:
            try:
                if max < int(check[-2:]):
                    max = int(check[-2:])
            except:
                print(check)
                pass
        for check in check_list:
            if check[-2:] == str(max):
                checkpoint.append(check.replace(check[-17:],"pt"))
    print("="*80)
#         dictionary[directory] = max
#     print('='*80)
#     sorted(dictionary, key=lambda k : dictionary[k])
#     print(*['{} : {}'.format(k,v) for k,v in dictionary.items()], sep="\n")
    for i in checkpoint:
        print(i)
                
    testset = "/home/cilab/LabMembers/YS/Inspec/data/finetuning/test.txt"
    
    print("load check point of model...")

    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    print(eval_dataset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in checkpoint:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)

    



['/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_10pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_11pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_12pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_13pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_14pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_15pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_16pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_17pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_18pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_19pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_1pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_20pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_21pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_22pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_23pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_24pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_25pt', '/mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_26pt', '/mnt_data/kp_20p_5e5_pretra

num_proposed:2879
num_correct:1190
num_gold:2430
precision=0.41
recall=0.49
f1=0.45

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_26pt/5.pt
num_proposed:3079
num_correct:1255
num_gold:2430
precision=0.41
recall=0.52
f1=0.46

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_27pt/4.pt
num_proposed:2812
num_correct:1196
num_gold:2430
precision=0.43
recall=0.49
f1=0.46

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_28pt/5.pt
num_proposed:2370
num_correct:1111
num_gold:2430
precision=0.47
recall=0.46
f1=0.46

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_29pt/3.pt
num_proposed:3140
num_correct:1272
num_gold:2430
precision=0.41
recall=0.52
f1=0.46

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_29pt/4.pt
num_proposed:3161
num_correct:1312
num_gold:2430
precision=0.42
recall=0.54
f1=0.47

Check Point :  /mnt_data/kp_20p_5e5_pretrained_Inspec/inspec_2pt/5.pt
num_proposed:2310
num_correct:1086
num_gold:2430
precision=0.47
r