# scores

In [None]:
import glob
import os

from seqeval.metrics import classification_report
from seqeval.metrics import sequence_labeling

# bert
from tokenization import BertTokenizer
import tokenization as tokenization

# kobert tokenizer
import sentencepiece as spm
import six

voacb_paths = [
    "../otherberts/mbert_cased",
    "../otherberts/bertbase_cased",
    "../otherberts/bioBERT/biobert_v1.1_pubmed",
    "../otherberts/kobert/models",
]

preds_paths = [
    "./finetuned/ver8.1.4_1142642_epoch2/test_pred",
    './finetuned/ver9.1.4_521121_epoch2/test_pred',
    './finetuned/ver11.1.4_521079_epoch2/test_pred',
    './finetuned/ver12.1.4_407013_epoch2/test_pred',
]



assert len(voacb_paths)==len(preds_paths)

# vocab_words
class Vocab_words(object):
    def __init__(self, vocab_file):
        self.i_to_w = {}
        self.w_to_i = {}
        self.getvocab(vocab_file)

    def getvocab(self, vocab_file):
        f = open(vocab_file, 'r')
        lines = f.readlines()
        for l in range(len(lines)):
            term = lines[l].strip("\n")
            term = convert_to_unicode(term)
            self.i_to_w[int(l)] = term
            self.w_to_i[term] = int(l)
            
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?")

In [None]:
linemap = {"tokens":0, "preds":1, "doc_ids":2, "sent_ids":3, "labels":4, 
           "out_logits_start":5, "out_logits_end":6}
print("linemap: ", linemap)

In [None]:
def calc_score(lines, vocab_path):
    
    tokens = lines[linemap["tokens"]].strip("\n").split("\t")
    preds = lines[linemap["preds"]].strip("\n").split("\t")
    doc_ids = lines[linemap["doc_ids"]].strip("\n").split("\t")
    sent_ids = lines[linemap["sent_ids"]].strip("\n").split("\t")
    labels = lines[linemap["labels"]].strip("\n").split("\t")
    
    # vocab
    if "mbert_cased" in vocab_path.lower():
        vocab_file = vocab_path+'/vocab.txt'
        tokenizer = BertTokenizer(vocab_file=vocab_file, do_lower_case=False, max_len=512)
        vocab_words = list(tokenizer.vocab.keys())

    elif "mbert_uncased" in vocab_path.lower():
        vocab_file = vocab_path+'/vocab.txt'
        tokenizer = BertTokenizer(vocab_file=vocab_file, do_lower_case=False, max_len=512)
        vocab_words = list(tokenizer.vocab.keys())
        
    elif "bertbase_uncased" in vocab_path.lower():
        vocab_file = vocab_path+'/vocab.txt'
        tokenizer = BertTokenizer(vocab_file=vocab_file, do_lower_case=True, max_len=512)
        vocab_words = list(tokenizer.vocab.keys())
    
    elif "bertbase_cased" in vocab_path.lower():
        vocab_file = vocab_path+'/vocab.txt'
        tokenizer = BertTokenizer(vocab_file=vocab_file, do_lower_case=True, max_len=512)
        vocab_words = list(tokenizer.vocab.keys())
    
    elif "biobert" in vocab_path.lower():
        vocab_file = vocab_path+'/vocab.txt'
        tokenizer = BertTokenizer(vocab_file=vocab_file, do_lower_case=False, max_len=512)
        vocab_words = list(tokenizer.vocab.keys())
    
    elif "kobert" in vocab_path.lower():
        vocab_file = "../otherberts/KoBERT/models/vocab.txt"
        vocab_words = Vocab_words(vocab_file)
        
        # sptokenizer
        spmodel = "../otherberts/KoBERT/models/spiece.model"
        tokenizer = spm.SentencePieceProcessor()
        tokenizer.load(spmodel)

    else:
        vocab_file = vocab_path+'/vocab.txt'
        vocab_words = Vocab_words(vocab_file)
        
    cut_interval = [0]
    last_doc_id = ""
    
    # The training sample contains one section of assessment per document.
    for d in range(len(doc_ids)):
        doc_id = doc_ids[d]
        if last_doc_id!=doc_id and d!=0:
            cut_interval.append(d)
        last_doc_id = doc_id
    cut_interval.append(len(tokens))
    
    
    # document level exact matching
    tp = []             # document level evaluation (accuracy)
    y_true_one_pt =  [] # token level eval
    y_pred_one_pt = []  # token level eval
    document_preds = [] # predicted tokens
    for d in range(len(cut_interval)-1):
        doc_start = cut_interval[d]
        doc_end = cut_interval[d+1]
        tokens_doc = tokens[doc_start:doc_end]
#         print("tokens_doc: ", tokens_doc)
        
        tokens_str = []
        label_str  = []
        pred_str   = []
        
        # exact matching eval
        preds_doc = preds[doc_start:doc_end]
        labels_doc = labels[doc_start:doc_end]
        
        for t in range(1, len(tokens_doc)):
            token_id = int(tokens_doc[t])
            if "mbert_" in vocab_path.lower():
                tokens_str.append(vocab_words[token_id])
            elif "biobert" in vocab_path.lower():
                tokens_str.append(vocab_words[token_id])
            elif "bertbase_" in vocab_path.lower():
                tokens_str.append(vocab_words[token_id])
            elif "kobert" in vocab_path.lower():
                tokens_str.append(vocab_words.i_to_w[token_id])
            else:
                tokens_str.append(vocab_words.i_to_w[token_id])
            label_str.append(str(labels_doc[t]))
            pred_str.append(str(preds_doc[t]))
        
        if len(tokens_str)>0:
            # collect documents
            document_preds.append(" ".join(tokens_str))
            document_preds.append(" ".join(label_str))
            document_preds.append(" ".join(pred_str))
        
        # document level exact matching
        preds_doc = preds[doc_start:doc_end]
        labels_doc = labels[doc_start:doc_end]
        
        # clean all matching = true positive
        # but we didn't care because it underestimate the model
        false_flag = False
        for t in range(len(tokens_doc)):
            if preds_doc[t]!=labels_doc[t]:
                false_flag = True
                break
        if false_flag==False:
            tp.append(1)
        else:
            tp.append(0)
        
        
        # token level eval - how many tokens were overlapped
        for t in range(1, len(tokens_doc)):
            y_true_one_pt.append(int(labels_doc[t]))
            y_pred_one_pt.append(int(preds_doc[t]))
            
    
    return y_true_one_pt, y_pred_one_pt, tp, document_preds
        

In [None]:
def change_char(tag):
    result = []
    for t in range(len(tag)):
        if tag[t]==1:
            result.append(["B-asmt"])
        elif tag[t]==0:
            result.append(["B-else"])
            
    return result


In [None]:
for p in range(len(preds_paths)):
    y_true = []
    y_pred = []
    exact_match = []
    
    path = preds_paths[p]+"/*.txt"
    files = glob.glob(path)
    files.sort()
    
    voacb_path = voacb_paths[p]
    
    outpath = "./data/scores/test"
    print("outpath: ", outpath)
    if not os.path.exists(outpath):
        os.makedirs(outpath)
        
    output_eval_preds = os.path.join(outpath, str(preds_paths[p].split("/")[-2]))
    print("output_eval_preds: ", output_eval_preds)
    if not os.path.exists(output_eval_preds):
        os.makedirs(output_eval_preds)
    
    print("path: ", path)
    for f in range(len(files)):
        if f%100==0:
            print(str(f)+"/"+str(len(files)))
        
        file = open(files[f], "r")
        filename = files[f].split("/")[-1]
        lines = file.readlines()
        y_true_one_pt, y_pred_one_pt, exact_match, outtext_file = calc_score(lines, voacb_path)
        y_true = y_true + y_true_one_pt
        y_pred = y_pred + y_pred_one_pt
        exact_match = exact_match + exact_match
        
        file_out = open(output_eval_preds+"/"+str(filename), "w")
        file_out.write("\n".join(outtext_file))
        file_out.close()
    
    
    # token level evaluation
    y_true = change_char(y_true)
    y_pred = change_char(y_pred)
    report = classification_report(y_true, y_pred, digits=4)
    
    outpath = "./data/scores/test"
    print("outpath: ", outpath)
    if not os.path.exists(outpath):
        os.makedirs(outpath)
    
    output_eval_file = os.path.join(outpath, str(preds_paths[p].split("/")[-2]+".txt"))
    with open(output_eval_file, "w") as writer:
        writer.write(report)
        
        
    exactmat_accuracy = sum(exact_match)/len(exact_match)
    with open(output_eval_file, "a") as writer:
        writer.write("\nexcat matching accuracy: "+str(exactmat_accuracy))

print("Done")

# merge results

In [None]:
import glob

filepaths = glob.glob("./data/scores/test/*.txt")
filepaths.sort()

names = []
scores_asmt = []
scores_else = []
scores_macro = []
scores_doc_acc = []
for p in range(len(filepaths)):
    file = open(filepaths[p], "r")
    lines = file.readlines()
    file.close()
    
    filename = ".".join(filepaths[p].split("/")[-1].split(".")[:-1])
    #print("filename: ", filename)
    names.append(filename)
    
    for l in range(len(lines)):
        line = lines[l].strip("\n")
        line = line.replace("    ", "\t")
        line = line.strip()
#         print("line:", line)
        
        if "asmt" in line:
            line = "\t".join(line.split("\t")[1:4])
            scores_asmt.append(line)
        elif "else" in line:
            line = "\t".join(line.split("\t")[1:4])
            scores_else.append(line)
        elif "macro" in line:
            line = "\t".join(line.split("\t")[1:4])
            scores_macro.append(line)
        elif "excat matching accuracy" in line:
            line = "".join(line.split(":")[-1].strip())
            scores_doc_acc.append(line)
            
#     break
print(len(names))
print(len(scores_asmt))
print(len(scores_else))
print(len(scores_macro))
print(len(scores_doc_acc))

outtext = ["name\tp_asmt\tr_asmt\tf_asmt\tp_else\tr_else\tf_else\tp_macro\tr_macro\tf_macro\tscores_doc_acc"]
for n in range(len(names)):
    print(names[n]+"\t"+scores_asmt[n]+"\t"+scores_else[n]+"\t"+scores_macro[n])
    outtext.append(names[n]+"\t"+scores_asmt[n]+"\t"+scores_else[n]+"\t"+scores_macro[n]+"\t"+scores_doc_acc[n])

file = open("./data/scores/test_score.txt", "w")
file.write("\n".join(outtext))
file.close()