In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from model_test import Net
from data_load import NerDataset, pad, VOCAB, tokenizer, tag2idx, idx2tag
import os
import numpy as np
import argparse

def eval(model, iterator):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch

            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])

    num_proposed = len(y_pred[y_pred>1])
    num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum()
    num_gold = len(y_true[y_true>1])

    print(f"num_proposed:{num_proposed}")
    print(f"num_correct:{num_correct}")
    print(f"num_gold:{num_gold}")
    try:
        precision = num_correct / num_proposed
    except ZeroDivisionError:
        precision = 1.0

    try:
        recall = num_correct / num_gold
    except ZeroDivisionError:
        recall = 1.0

    try:
        f1 = 2*precision*recall / (precision + recall)
    except ZeroDivisionError:
        if precision*recall==0:
            f1=1.0
        else:
            f1=0

    os.remove("temp")

    print("precision=%.2f"%precision)
    print("recall=%.2f"%recall)
    print("f1=%.2f"%f1)
    return precision, recall, f1

class Arg():
    def __init__(self, check_path):
        self.testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
        self.checkpoint = check_path
        self.batch_size = 8
  
        
import glob
check_lists=[]
check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/*.pt")
check_len = len(check_list)
for i in range(1,check_len+1):
    check_lists.append("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_WWW/{}.pt".format(i))

if __name__=="__main__":
    print("load check point of model...")
    
    print(checkpoint.keys())
    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(hp.testset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=hp.batch_size,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...


NameError: name 'checkpoint' is not defined

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from model_test import Net
from data_load import NerDataset, pad, VOCAB, tokenizer, tag2idx, idx2tag
import os
import numpy as np
import argparse

def eval(model, iterator):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch

            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])

    num_proposed = len(y_pred[y_pred>1])
    num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum()
    num_gold = len(y_true[y_true>1])

    print(f"num_proposed:{num_proposed}")
    print(f"num_correct:{num_correct}")
    print(f"num_gold:{num_gold}")
    try:
        precision = num_correct / num_proposed
    except ZeroDivisionError:
        precision = 1.0

    try:
        recall = num_correct / num_gold
    except ZeroDivisionError:
        recall = 1.0

    try:
        f1 = 2*precision*recall / (precision + recall)
    except ZeroDivisionError:
        if precision*recall==0:
            f1=1.0
        else:
            f1=0

    os.remove("temp")

    print("precision=%.2f"%precision)
    print("recall=%.2f"%recall)
    print("f1=%.2f"%f1)
    return precision, recall, f1

class Arg():
    def __init__(self, check_path):
        self.testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
        self.checkpoint = check_path
        self.batch_size = 8
  
        
import glob
check_lists=[]
check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/*.pt")
check_len = len(check_list)
for i in range(1,check_len+1):
    check_lists.append("/home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/{}.pt".format(i))

if __name__=="__main__":
    print("load check point of model...")
    
    print(checkpoint.keys())
    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(hp.testset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=hp.batch_size,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...
dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict'])

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/1.pt
num_proposed:411
num_correct:225
num_gold:1121
precision=0.55
recall=0.20
f1=0.29

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/2.pt
num_proposed:779
num_correct:387
num_gold:1121
precision=0.50
recall=0.35
f1=0.41

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/3.pt
num_proposed:799
num_correct:363
num_gold:1121
precision=0.45
recall=0.32
f1=0.38

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/4.pt
num_proposed:674
num_correct:318
num_gold:1121
precision=0.47
recall=0.28
f1=0.35

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/5.pt
num_proposed:941
num_correct:417
num_gold:1121
precision=0.44
recall=0.37
f1=0.40

Check Point :  /home/cilab/LabMembers/YS/bert_ner/finetuned_by_KDD/6.pt
num_proposed:1024
num_correct:426
num_gold:1121
precision=

## kp20k 20% (pre-trained)

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from model_test import Net
from data_load import NerDataset, pad, VOCAB, tokenizer, tag2idx, idx2tag
import os
import numpy as np
import argparse

def eval(model, iterator):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch

            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])

    num_proposed = len(y_pred[y_pred>1])
    num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum()
    num_gold = len(y_true[y_true>1])

    print(f"num_proposed:{num_proposed}")
    print(f"num_correct:{num_correct}")
    print(f"num_gold:{num_gold}")
    try:
        precision = num_correct / num_proposed
    except ZeroDivisionError:
        precision = 1.0

    try:
        recall = num_correct / num_gold
    except ZeroDivisionError:
        recall = 1.0

    try:
        f1 = 2*precision*recall / (precision + recall)
    except ZeroDivisionError:
        if precision*recall==0:
            f1=1.0
        else:
            f1=0

    os.remove("temp")

    print("precision=%.2f"%precision)
    print("recall=%.2f"%recall)
    print("f1=%.2f"%f1)
    return precision, recall, f1

class Arg():
    def __init__(self, check_path):
        self.testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
        self.checkpoint = check_path
        self.batch_size = 8
  
        
import glob
check_lists=[]
check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/*.pt")
check_len = len(check_list)
for i in range(1,check_len+1):
    check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/{}.pt".format(i))

if __name__=="__main__":
    print("load check point of model...")
    testset = "/home/cilab/LabMembers/YS/WWW/finetuning/test.txt"
    
    
    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/1.pt
num_proposed:8399
num_correct:330
num_gold:1738
precision=0.04
recall=0.19
f1=0.07

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/2.pt
num_proposed:7168
num_correct:271
num_gold:1738
precision=0.04
recall=0.16
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/3.pt
num_proposed:7006
num_correct:270
num_gold:1738
precision=0.04
recall=0.16
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/4.pt
num_proposed:7394
num_correct:303
num_gold:1738
precision=0.04
recall=0.17
f1=0.07

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/5.pt
num_proposed:7068
num_correct:304
num_gold:1738
precision=0.04
recall=0.17
f1=0.07

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/6.pt
num_proposed:8585
num_correct:354
num_gold:1738
precision=0.04
r

## kp20k 20% (pre-trained) WWW


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from model_test import Net
from data_load import NerDataset, pad, VOCAB, tokenizer, tag2idx, idx2tag
import os
import numpy as np
import argparse

def eval(model, iterator):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch

            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open("temp", 'r').read().splitlines() if len(line) > 0])

    num_proposed = len(y_pred[y_pred>1])
    num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum()
    num_gold = len(y_true[y_true>1])

    print(f"num_proposed:{num_proposed}")
    print(f"num_correct:{num_correct}")
    print(f"num_gold:{num_gold}")
    try:
        precision = num_correct / num_proposed
    except ZeroDivisionError:
        precision = 1.0

    try:
        recall = num_correct / num_gold
    except ZeroDivisionError:
        recall = 1.0

    try:
        f1 = 2*precision*recall / (precision + recall)
    except ZeroDivisionError:
        if precision*recall==0:
            f1=1.0
        else:
            f1=0

    os.remove("temp")

    print("precision=%.2f"%precision)
    print("recall=%.2f"%recall)
    print("f1=%.2f"%f1)
    return precision, recall, f1

class Arg():
    def __init__(self, check_path):
        self.testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
        self.checkpoint = check_path
        self.batch_size = 8
  
        
import glob
check_lists=[]
check_list = glob.glob("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/*.pt")
check_len = len(check_list)
for i in range(1,check_len+1):
    check_lists.append("/home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/{}.pt".format(i))

if __name__=="__main__":
    print("load check point of model...")
    #testset = "/home/cilab/LabMembers/YS/KDD/finetuning/test.txt"
    testset = "/home/cilab/LabMembers/YS/kp20k/finetuning/valid_40.txt"
    
    model = Net(False, len(VOCAB), 'cpu', False)
    eval_dataset = NerDataset(testset)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=pad)
    max_f1 = 0
    max_pt = ""
    f1_list = []
    for check in check_lists:
        print("\nCheck Point : ",check)
        hp = Arg(check)
        checkpoint = torch.load(hp.checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)

        precision, recall, f1 = eval(model, eval_iter)
        f1_list.append(format(f1, '.2f'))
        if max_f1<f1:
            max_f1 = f1
            max_pt = check
    print("\n\n{} : F1_Score : {}".format(max_pt.split('/')[-1], max_f1))
    print(f1_list)



load check point of model...

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/1.pt
num_proposed:3202617
num_correct:105395
num_gold:563559
precision=0.03
recall=0.19
f1=0.06

Check Point :  /home/cilab/LabMembers/YS/bert_ner/new_new_new_model_by_kp40/2.pt


KeyboardInterrupt: 