In [1]:
import json
import os
import parser
from src import dataio
import glob
from sklearn.metrics import accuracy_score
from seqeval.metrics import f1_score, precision_score, recall_score

import torch
torch.backends.cudnn.benchmark = True

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
if device != "cpu":
    torch.cuda.set_device(0)

Using TensorFlow backend.


### Korean FrameNet ###
	# contact: hahmyg@kaist, hahmyg@gmail.com #


	###DEVICE: cuda:0


In [2]:
try:
    dir_path = os.path.dirname(os.path.abspath( __file__ ))
except:
    dir_path = '.'

In [3]:
# 실행시간 측정 함수
import time

_start_time = time.time()

def tic():
    global _start_time 
    _start_time = time.time()

def tac():
    t_sec = round(time.time() - _start_time)
    (t_min, t_sec) = divmod(t_sec,60)
    (t_hour,t_min) = divmod(t_min,60)
    
    result = '{}hour:{}min:{}sec'.format(t_hour,t_min,t_sec)
    return result

In [4]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=2).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

with open('./data/frame_coreFE_list.json','r') as f:
    frame_coreFE = json.load(f)

def weighting(frame, args):
    weighted_args = []
    for arg in args:
        weighted_args.append(arg)
        if arg in frame_coreFE[frame]:
            weighted_args.append(arg)
        else:
            pass
    return weighted_args

In [19]:
def test(srl=False, masking=False, viterbi=False, language=False, model_path=False, 
         result_dir=False, train_lang=False, tgt=False, 
         pretrained="bert-base-multilingual-cased"):
    if not result_dir:
        result_dir = '/disk/data/models/'+model_dir.split('/')[-2]+'-result/'
    else:
        pass
    if result_dir[-1] != '/':
        result_dir = result_dir+'/'
        
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
        
    if not train_lang:
        train_lang = language
    
    fname = fname = result_dir+train_lang+'_for_'+language
        
    if masking:
        fname = fname + '_with_masking_result.txt'
    else:
        fname = fname +'_result.txt'
        
    print('### Your result would be saved to:', fname)
        
    trn, dev, tst = dataio.load_data(srl=srl, language=language, exem=False)
    print('### EVALUATION')
    print('MODE:', srl)
    print('target LANGUAGE:', language)
    print('trained LANGUAGE:', train_lang)
    print('Viterbi:', viterbi)
    print('masking:', masking)
    print('using TGT token:', tgt)
    tic()    
        
    models = glob.glob(model_path+'*/')
    
#     models = []
    
    # en_exemplar best
#     models.append('/disk/data/models/dict_framenet/enModel-with-exemplar/9/')
#     models.append('/disk/data/models/frameBERT/frameBERT_en/')
    
#     # ko best
#     models.append('/disk/data/models/framenet/koModel/35/')
    
    # mul best
#     models.append('/disk/data/models/framenet_old/mulModel-100/39/')
#     models.append('/disk/data/models/dict_framenet/mulModel-100/39/')
    
    eval_result = []
    for m in models:
#         m = '/disk/data/models/framenet/enModel-with-exemplar/epoch-8-joint.pt'
        print('### model dir:', m)
        print('### TARGET LANGUAGE:', language)
        torch.cuda.set_device(device)
        model = parser.ShallowSemanticParser(srl=srl,gold_pred=True, model_path=m, viterbi=viterbi, 
                                             masking=masking, language='multilingual', tgt=tgt,
                                             pretrained=pretrained)

        gold_senses, pred_senses, gold_args, pred_args = [],[],[],[]        
        gold_full_all, pred_full_all = [],[]

        for instance in tst:
            torch.cuda.set_device(device)
#             try:
            result = model.parser(instance)

            gold_sense = [i for i in instance[2] if i != '_'][0]
            pred_sense = [i for i in result[0][2] if i != '_'][0]


            gold_arg = [i for i in instance[3] if i != 'X']
            pred_arg = [i for i in result[0][3]]

            gold_senses.append(gold_sense)
            pred_senses.append(pred_sense)

            gold_args.append(gold_arg)
            pred_args.append(pred_arg)

            if srl == 'framenet':
                gold_full = []
                gold_full.append(gold_sense)
                gold_full.append(gold_sense)
                weighted_gold_args = weighting(gold_sense, gold_arg)
                gold_full += weighted_gold_args

                pred_full = []
                pred_full.append(pred_sense)
                pred_full.append(pred_sense)
                weighted_pred_args = weighting(pred_sense, pred_arg)
                pred_full += weighted_pred_args

                gold_full_all.append(gold_full)
                pred_full_all.append(pred_full)
                
                    
#             except KeyboardInterrupt:
#                 raise
#             except:
#                 print("cuda error")
#                 pass
            
#             break
            
        acc = accuracy_score(gold_senses, pred_senses)
        arg_f1 = f1_score(gold_args, pred_args)
        arg_precision = precision_score(gold_args, pred_args)
        arg_recall = recall_score(gold_args, pred_args)
        

#         epoch = m.split('/')[-1].split('-')[1]
        epoch = m.split('/')[-2]
        print('# EPOCH:', epoch)
        print("SenseId Accuracy: {}".format(acc))
        print("ArgId Precision: {}".format(arg_precision))
        print("ArgId Recall: {}".format(arg_recall))
        print("ArgId F1: {}".format(arg_f1))
        if srl == 'framenet':
            full_f1 = f1_score(gold_full_all, pred_full_all)
            full_precision = precision_score(gold_full_all, pred_full_all)
            full_recall = recall_score(gold_full_all, pred_full_all)
            print("full-structure Precision: {}".format(full_precision))
            print("full-structure Recall: {}".format(full_recall))
            print("full-structure F1: {}".format(full_f1))
        print('-----processing time:', tac())
        print('')


        model_result = []
        model_result.append(epoch)
        model_result.append(acc)
        model_result.append(arg_precision)
        model_result.append(arg_recall)
        model_result.append(arg_f1)
        if srl == 'framenet':
            model_result.append(full_precision)
            model_result.append(full_recall)
            model_result.append(full_f1)
        model_result = [str(i) for i in model_result]
        eval_result.append(model_result)
            
#         break
        
#     print(eval_result)
    
    
    with open(fname,'w') as f:
        if srl == 'framenet':
            f.write('epoch'+'\t''SenseID'+'\t'+'Arg_P'+'\t'+'Arg_R'+'\t'+'ArgF1'+'\t'+'full_P'+'\t'+'full_R'+'\t'+'full_F1'+'\n')
        else:
            f.write('epoch'+'\t''SenseID'+'\t'+'Arg_P'+'\t'+'Arg_R'+'\t'+'ArgF1'+'\n')
        for i in eval_result:
            line = '\t'.join(i)
            f.write(line+'\n')
            
        print('\n\t### Your result is saved at:', fname)

# eval for en for en

In [None]:
# print('\t###multilingual-for-en-masking')
# srl = 'framenet'
# language = 'en'
# model_path = '/disk/data/models/framenet/enModel-with-exemplar/'

# result_dir = '/disk/data/models/eval_result/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='en_with_exem', 
#      model_path=model_path, result_dir=result_dir)

# eval for ko for ko

In [6]:
# print('\t###multilingual-for-en-masking')
# srl = 'framenet'
# language = 'en'
# model_path = '/disk/data/models/framenet/enModel-with-exemplar/'

# result_dir = '/disk/data/models/results/framenet/enModel-with-exemplar/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='en_with_exem', 
#      model_path=model_path, result_dir=result_dir)

In [7]:
# print('\t###multilingual-for-ko-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = '/disk/data/models/framenet/koModel/'
# result_dir = '/disk/data/models/results/framenet/koModel/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='ko', 
#      model_path=model_path, result_dir=result_dir)

In [18]:
# print('\t###multilingual-for-en-without-masking')
# srl = 'framenet'
# language = 'en'
# model_path = '/disk/data/models/framenet/enModel-with-exemplar/'
# result_dir = '/disk/data/models/results/framenet/enModel-with-exemplar/'
# test(srl=srl, language=language, masking=False, viterbi=False, tgt=True, train_lang='en_with_exem', 
#      model_path=model_path, result_dir=result_dir)

	###multilingual-for-en-without-masking
### Your result would be saved to: /disk/data/models/results/framenet/enModel-with-exemplar/en_with_exem_for_en_result.txt
# of instances in trn: 19391
# of instances in dev: 2272
# of instances in tst: 6714
data example: [['Greece', 'wildfires', 'force', 'thousands', 'to', '<tgt>', 'evacuate', '</tgt>'], ['_', '_', '_', '_', '_', '_', 'evacuate.v', '_'], ['_', '_', '_', '_', '_', '_', 'Escaping', '_'], ['O', 'O', 'O', 'B-Escapee', 'O', 'X', 'O', 'X']]
### EVALUATION
MODE: framenet
target LANGUAGE: en
trained LANGUAGE: en_with_exem
Viterbi: False
masking: False
using TGT token: True
### model dir: /disk/data/models/framenet_old/mulModel-100/39/
### TARGET LANGUAGE: en
srl model: framenet
language: multilingual
version: 1.1
using viterbi: False
using masking: False
pretrained BERT: bert-base-multilingual-cased
using TGT special token: True
used dictionary:
	 /disk/kaiser/kaiser/src/../koreanframenet/resource/info/mul_lu2idx.json
	 /disk/kaiser/kai

  pred_logits = sm(masked_logit).view(1,-1)


Calendric_unit Calendric_unit
Likelihood Likelihood
Giving Revenge
Awareness Awareness
Increment Increment
Purpose Statement
Goal Goal
Grasp Grasp
Giving Revenge
Assistance Assistance
Causation Means
People People
Desiring Desiring
Awareness Awareness
Degree Revenge
Importance Revenge
Frequency Frequency
People People
Needing Needing
Being_employed Work
Familiarity Awareness
Importance Importance
Causation Causation
Giving Giving
Goal Locative_relation
Motion Motion
Temporal_collocation Temporal_collocation
Capability Capability
Emotion_directed Emotion_directed
Commerce_buy Commerce_buy
Temporal_collocation Temporal_collocation
Capability Capability
Commerce_buy Commerce_buy
Money Earnings_and_losses
Being_employed Work
Earnings_and_losses Earnings_and_losses
Money Commerce_scenario
Possession Possession
Text_creation Text_creation
Being_named Being_named
Capability Capability
Emotion_directed Experiencer_focus
Locative_relation Locative_relation
Being_employed Work
Cause_change Under

KeyboardInterrupt: 

In [20]:
# print('\t###ko-for-ko-without-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = '/disk/data/models/framenet/koModel/'
# result_dir = '/disk/data/models/results/framenet/koModel/'
# test(srl=srl, language=language, masking=False, viterbi=False, tgt=True, train_lang='ko', 
#      model_path=model_path, result_dir=result_dir)

	###ko-for-ko-without-masking
### Your result would be saved to: /disk/data/models/results/framenet/koModel/ko_for_ko_result.txt

### loading Korean FrameNet 1.1 data...
	# of instances in training data: 17838
	# of instances in dev data: 2548
	# of instances in test data: 5097
# of instances in trn: 17838
# of instances in dev: 2548
# of instances in tst: 5097
data example: [['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '<tgt>', '순이익이', '</tgt>', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '이익.n', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 'Earnings_and_losses', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O

  pred_logits = sm(masked_logit).view(1,-1)


Businesses Businesses
Calendric_unit Calendric_unit
Objective_influence Objective_influence
Calendric_unit Separating
Measure_duration Measure_duration
Change_position_on_a_scale Change_position_on_a_scale
Earnings_and_losses Change_position_on_a_scale
Earnings_and_losses Contingency
Commerce_sell Commerce_sell
Craft Artifact
Fluidic_motion Fluidic_motion
Locale_by_use Locale_by_use
Partitive Partitive
Evoking Arriving
People_by_jurisdiction Education_teaching
Chatting Social_interaction_evaluation
Connecting_architecture Connecting_architecture
First_experience Ordinal_numbers
Building_subparts Building_subparts
Noise_makers Noise_makers
History History
Noise_makers Noise_makers
People_by_origin People_by_origin
Noise_makers Type
Measure_mass Measure_mass
Cause_to_make_noise Cause_to_make_noise
Building_subparts Building_subparts
Statement Statement
Noise_makers Noise_makers
Locative_relation Locative_relation
Artifact Containers
Process_end Process_end
Noise_makers Noise_makers
Using

Travel Touring
Becoming Becoming
Becoming_a_member Becoming_a_member
Membership Membership
Infrastructure Locale_by_use
Dimension Dimension
Statement Statement
Roadways Experiencer_focus
Political_locales Relational_political_locales
Architectural_part Architectural_part
Political_locales Political_locales
Building Building
People Commerce_buy
Buildings Buildings


KeyboardInterrupt: 

In [None]:
# print('\t###en-for-ko-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = ''
# result_dir = '/disk/data/models/results/framenet/enModel-with-exemplar/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='en_with_exem', 
#      model_path=model_path, result_dir=result_dir)

In [None]:
# print('\t###en-for-ko-without-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = ''
# result_dir = '/disk/data/models/results/framenet/enModel-with-exemplar/'
# test(srl=srl, language=language, masking=False, viterbi=False, tgt=True, train_lang='en_with_exem', 
#      model_path=model_path, result_dir=result_dir)

# eval for KFN

In [None]:
# print('\t###multilingual-for-ko-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = '/disk/data/models/framenet/mulModel-100/'

# result_dir = '/disk/data/models/eval_result/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='mul', 
#      model_path=model_path, result_dir=result_dir)

In [None]:
# print('\t###multilingual-for-ko-without-masking')
# srl = 'framenet'
# language = 'ko'
# model_path = '/disk/data/models/framenet/mulModel-100/'
# model_path = '/disk/data/models/framenet/mulModel-100/'

# result_dir = '/disk/data/models/results/framenet/mulModel-100/'
# test(srl=srl, language=language, masking=False, viterbi=False, tgt=True, train_lang='en_ko', 
#      model_path=model_path, result_dir=result_dir)

# eval for En again using mulModel

In [None]:
# print('\t###multilingual-for-en-masking')
# srl = 'framenet'
# language = 'en'
# model_path = '/disk/data/models/framenet/mulModel-100/'

# result_dir = '/disk/data/models/eval_result/'
# test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='mul', 
#      model_path=model_path, result_dir=result_dir)

In [None]:
# print('\t###multilingual-for-en-without-masking')
# srl = 'framenet'
# language = 'en'
# model_path = '/disk/data/models/framenet/mulModel-100/'
# model_path = '/disk/data/models/framenet/mulModel-100/'

# result_dir = '/disk/data/models/results/framenet/mulModel-100-for-en/'
# test(srl=srl, language=language, masking=False, viterbi=False, tgt=True, train_lang='en_ko', 
#      model_path=model_path, result_dir=result_dir)

# eval for distilling

In [None]:
print('\t###multilingual-for-ko-masking')
srl = 'framenet'
language = 'ko'
model_path = '/disk/data/models/framenet/distilling/'

result_dir = '/disk/data/models/distilling/'
test(srl=srl, language=language, masking=True, viterbi=False, tgt=True, train_lang='distilling', 
     model_path=model_path, result_dir=result_dir)