In [1]:
import sys
import glob
import torch
sys.path.append('../')
import os
from transformers import *
from kaiser.src import utils
from kaiser.src import dataio
from kaiser.src.modeling import BertForJointShallowSemanticParsing
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
from torch import nn
from torch.optim import Adam
from tqdm import tqdm, trange
from sklearn.metrics import accuracy_score
from seqeval.metrics import f1_score, precision_score, recall_score

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
if device != "cpu":
    torch.cuda.set_device(0)
# device = torch.device('cpu')
# torch.cuda.set_device(device)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = True

import numpy as np
import random
np.random.seed(0)   
random.seed(0)

from torch import autograd
torch.cuda.empty_cache()

### Korean FrameNet ###
	# contact: hahmyg@kaist, hahmyg@gmail.com #



Using TensorFlow backend.


In [2]:
import random

In [3]:
# 실행시간 측정 함수
import time

_start_time = time.time()

def tic():
    global _start_time 
    _start_time = time.time()

def tac():
    t_sec = round(time.time() - _start_time)
    (t_min, t_sec) = divmod(t_sec,60)
    (t_hour,t_min) = divmod(t_min,60)
    
    result = '{}hour:{}min:{}sec'.format(t_hour,t_min,t_sec)
    return result

In [4]:
try:
    dir_path = os.path.dirname(os.path.abspath( __file__ ))
except:
    dir_path = '.'

In [5]:
def train(retrain=False, pretrained_dir=False):
    if pretrained_dir:
        print('original model:', pretrained_dir)
    else:
        print('original model:', 'BERT-multilingual-base')
    print('\n\tyour model would be saved at', model_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # load a model first
    if retrain:
#         model_saved_path = pretrained_dir
        model = BertForJointShallowSemanticParsing.from_pretrained(pretrained_dir, 
                                                                   num_senses = len(bert_io.sense2idx), 
                                                                   num_args = len(bert_io.bio_arg2idx),
                                                                   lufrmap=bert_io.lufrmap, 
                                                                   frargmap = bert_io.bio_frargmap)
    else:
#         model_saved_path = PRETRAINED_MODEL
        model = BertForJointShallowSemanticParsing.from_pretrained(PRETRAINED_MODEL, 
                                                                   num_senses = len(bert_io.sense2idx), 
                                                                   num_args = len(bert_io.bio_arg2idx),
                                                                   lufrmap=bert_io.lufrmap, 
                                                                   frargmap = bert_io.bio_frargmap)
    model.to(device)
    
    print('retrain:', retrain)
    tic()
    print('\n### converting data to BERT input...')
    trn_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(trn)
    print('\t ...is done:', tac())
    print('\t#of instance:', len(trn), len(trn_data))
    sampler = RandomSampler(trn)
    trn_dataloader = DataLoader(trn_data, sampler=sampler, batch_size=batch_size)
    
    # load optimizer
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        param_optimizer = list(model.classifier.named_parameters()) 
        optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]
    optimizer = Adam(optimizer_grouped_parameters, lr=3e-5)

#     lr = 5e-5
#     lr =3e-5
#     optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)
#     num_training_steps = len(trn_dataloader) // epochs
#     scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)  # PyTorch scheduler
    
    max_grad_norm = 1.0
#     global_step = 0
#     num_of_epoch = 1
    num_of_epoch = 0
    accuracy_result = []
    for _ in trange(epochs, desc="Epoch"):
        # load a fine-tuned model
#         print('epoch:', num_of_epoch)
#         print('epoch-1 model:', model_saved_path)
#         model = BertForJointShallowSemanticParsing.from_pretrained(model_saved_path, 
#                                                                    num_senses = len(bert_io.sense2idx), 
#                                                                    num_args = len(bert_io.bio_arg2idx),
#                                                                    lufrmap=bert_io.lufrmap, 
#                                                                    frargmap = bert_io.bio_frargmap)
#         model.to(device)
        
#         lr = 5e-5
#         optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)
#         num_training_steps = len(trn_dataloader) // epochs
#         scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)  # PyTorch scheduler
        
        # TRAIN loop
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        for step, batch in enumerate(trn_dataloader):
            model.train()
            # add batch to gpu
            torch.cuda.set_device(0)
#             torch.cuda.set_device(device)
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_orig_tok_to_maps, b_input_lus, b_input_senses, b_input_args, b_token_type_ids, b_input_masks = batch            
#             print(b_token_type_ids[0])
            # forward pass
#             with autograd.detect_anomaly():
            loss = model(b_input_ids, lus=b_input_lus, senses=b_input_senses, args=b_input_args,
                     token_type_ids=b_token_type_ids, attention_mask=b_input_masks)
            # backward pass


            loss.backward()
            # track train loss
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
            # gradient clipping
            torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
            # update parameters
            optimizer.step()
#             scheduler.step()
            model.zero_grad()
    
#             break

        print("Train loss: {}".format(tr_loss/nb_tr_steps))
        
        # save your model
        model_saved_path = model_dir+str(num_of_epoch)+'/'
        print('\n\tyour model is saved:', model_saved_path)
        if not os.path.exists(model_saved_path):
            os.makedirs(model_saved_path)
        model.save_pretrained(model_saved_path)
        
        # load a fine-tuned model
#         model = BertForJointShallowSemanticParsing.from_pretrained(model_saved_path, 
#                                                                    num_senses = len(bert_io.sense2idx), 
#                                                                    num_args = len(bert_io.bio_arg2idx),
#                                                                    lufrmap=bert_io.lufrmap, 
#                                                                    frargmap = bert_io.bio_frargmap)
#         model.to(device)
        
        num_of_epoch += 1

        
#         break
    print('...training is done')

In [6]:
srl = 'framenet'
masking = True
MAX_LEN = 256
batch_size = 6
PRETRAINED_MODEL = "bert-base-multilingual-cased"
fnversion = '1.7'
language = 'multi'

# (1) train En-FN with exemplars model

In [7]:
# model_dir = '/disk/data/models/framenet/enModel-with-exemplar/'
# trn, dev, tst = dataio.load_data(srl=srl, language='en')
# # trn = random.sample(trn, k=50)
# # epochs = 10
# epochs = 20

# print('')
# print('### TRAINING')
# print('MODEL:', srl)
# print('LANGUAGE:', language)
# print('PRETRAINED BERT:', PRETRAINED_MODEL)
# print('training data:')
# print('\t(en):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train()
# train(retrain=True, pretrained_dir='/disk/data/models/framenet/enModel-with-exemplar/0/')

# (2) train KFN

In [8]:
# model_dir = '/disk/data/models/framenet/koModel/'
# trn, dev, tst = dataio.load_data(srl=srl, language='ko')
# # trn = random.sample(trn, k=50)
# epochs = 50

# print('\nFrameBERT(ko)')
# print('### TRAINING')
# print('MODEL:', srl)
# print('LANGUAGE:', language)
# print('PRETRAINED BERT:', PRETRAINED_MODEL)
# print('training data:')
# print('\t(ko):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train()

# (3) fine-tuning by Korean FrameNet

In [9]:
# by 100%

model_dir = '/disk/data/models/dict_framenet/mulModel-100/'
trn, dev, tst = dataio.load_data(srl=srl, language='ko')
# trn = random.sample(trn, k=20)
epochs = 50


print('\nFineTuning Multilingual')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/9/')


### loading Korean FrameNet 1.1 data...
	# of instances in training data: 17838
	# of instances in dev data: 2548
	# of instances in test data: 5097
# of instances in trn: 17838
# of instances in dev: 2548
# of instances in tst: 5097
data example: [['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '<tgt>', '순이익이', '</tgt>', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '이익.n', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 'Earnings_and_losses', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'B-Time', 'X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

retrain: True

### converting data to BERT input...
	 ...is done: 0hour:0min:0sec
	#of instance: 20 20


  pred_logits = sm(masked_logit).view(1,-1)


Train loss: 4.220431804656982

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/0/


Epoch:   2%|▏         | 1/50 [00:01<01:19,  1.63s/it]

Train loss: 2.73818176984787

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/1/


Epoch:   4%|▍         | 2/50 [00:03<01:16,  1.59s/it]

Train loss: 1.7915591299533844

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/2/


Epoch:   6%|▌         | 3/50 [00:04<01:13,  1.56s/it]

Train loss: 0.9227814972400665

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/3/


Epoch:   8%|▊         | 4/50 [00:06<01:10,  1.54s/it]

Train loss: 0.6022640839219093

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/4/


Epoch:  10%|█         | 5/50 [00:07<01:08,  1.53s/it]

Train loss: 0.44140859693288803

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/5/


Epoch:  12%|█▏        | 6/50 [00:09<01:06,  1.52s/it]

Train loss: 0.26365745812654495

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/6/


Epoch:  14%|█▍        | 7/50 [00:10<01:04,  1.51s/it]

Train loss: 0.1456884320359677

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/7/


Epoch:  16%|█▌        | 8/50 [00:12<01:03,  1.51s/it]

Train loss: 0.12089874967932701

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/8/


Epoch:  18%|█▊        | 9/50 [00:13<01:01,  1.50s/it]

Train loss: 0.09935624059289694

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/9/


Epoch:  20%|██        | 10/50 [00:15<01:00,  1.50s/it]

Train loss: 0.07744738459587097

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/10/


Epoch:  22%|██▏       | 11/50 [00:16<00:58,  1.50s/it]

Train loss: 0.044082353299017996

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/11/


Epoch:  24%|██▍       | 12/50 [00:18<00:56,  1.50s/it]

Train loss: 0.04266034346073866

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/12/


Epoch:  26%|██▌       | 13/50 [00:20<01:07,  1.82s/it]

Train loss: 0.04869956523180008

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/13/


Epoch:  28%|██▊       | 14/50 [00:22<01:02,  1.72s/it]

Train loss: 0.023333578370511532

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/14/


Epoch:  30%|███       | 15/50 [00:23<01:01,  1.75s/it]

Train loss: 0.020641659619286656

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/15/


Epoch:  32%|███▏      | 16/50 [00:27<01:15,  2.23s/it]

Train loss: 0.00989191813278012

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/16/


Epoch:  34%|███▍      | 17/50 [00:29<01:13,  2.23s/it]

Train loss: 0.013524946494726464

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/17/


Epoch:  36%|███▌      | 18/50 [00:32<01:19,  2.48s/it]

Train loss: 0.03522271430119872

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/18/


Epoch:  38%|███▊      | 19/50 [00:36<01:25,  2.76s/it]

Train loss: 0.011423524701967835

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/19/


Epoch:  40%|████      | 20/50 [00:39<01:28,  2.96s/it]

Train loss: 0.02787267998792231

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/20/


Epoch:  42%|████▏     | 21/50 [00:44<01:41,  3.50s/it]

Train loss: 0.0070113234396558255

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/21/


Epoch:  44%|████▍     | 22/50 [00:48<01:42,  3.68s/it]

Train loss: 0.004456451497389935

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/22/


Epoch:  46%|████▌     | 23/50 [00:51<01:37,  3.60s/it]

Train loss: 0.010074212506879121

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/23/


Epoch:  48%|████▊     | 24/50 [00:55<01:32,  3.55s/it]

Train loss: 0.0027217671449761838

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/24/


Epoch:  50%|█████     | 25/50 [00:58<01:27,  3.51s/it]

Train loss: 0.0026895536138908938

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/25/


Epoch:  52%|█████▏    | 26/50 [01:04<01:42,  4.29s/it]

Train loss: 0.010963961336528882

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/26/


Epoch:  54%|█████▍    | 27/50 [01:09<01:39,  4.32s/it]

Train loss: 0.0011904256243724376

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/27/


Epoch:  56%|█████▌    | 28/50 [01:15<01:49,  4.99s/it]

Train loss: 0.004111765447305515

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/28/


Epoch:  58%|█████▊    | 29/50 [01:19<01:37,  4.66s/it]

Train loss: 0.008325210153998341

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/29/


Epoch:  60%|██████    | 30/50 [01:21<01:18,  3.93s/it]

Train loss: 0.0007576140778837726

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/30/


Epoch:  62%|██████▏   | 31/50 [01:24<01:09,  3.65s/it]

Train loss: 0.008243121090345085

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/31/


Epoch:  64%|██████▍   | 32/50 [01:29<01:13,  4.06s/it]

Train loss: 0.006486934049462434

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/32/


Epoch:  66%|██████▌   | 33/50 [01:32<01:03,  3.72s/it]

Train loss: 0.0037227150787657592

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/33/


Epoch:  68%|██████▊   | 34/50 [01:35<00:57,  3.59s/it]

Train loss: 0.0009305440689786337

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/34/


Epoch:  70%|███████   | 35/50 [01:39<00:54,  3.61s/it]

Train loss: 0.0009307972504757345

	your model is saved: /disk/data/models/dict_framenet/mulModel-100/35/


KeyboardInterrupt: 

In [None]:
# by 25% (4460)

# model_dir = '/disk/data/models/framenet/mulModel-25/'
# epochs = 50

# trn, dev, tst = dataio.load_data(srl=srl, language='ko')

# # trn = random.sample(trn, k=4460)
# trn = random.sample(trn, k=50)
# language = 'multi'

# print('')
# print('### TRAINING')
# print('MODEL:', srl)
# print('LANGUAGE:', language)
# print('PRETRAINED BERT:', PRETRAINED_MODEL)
# print('training data:')
# print('\t(ko):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# # train(retrain=True, pretrained_dir='/disk/data/models/framenet/enModel-with-exemplar/0/')

In [None]:
# by 50% (8919)

# model_dir = '/disk/data/models/framenet/mulModel-50/'
# epochs = 20

# trn, dev, tst = dataio.load_data(srl=srl, language='ko')

# trn = random.sample(trn, k=8919)
# language = 'multi'

# print('')
# print('### TRAINING')
# print('MODEL:', srl)
# print('LANGUAGE:', language)
# print('PRETRAINED BERT:', PRETRAINED_MODEL)
# print('training data:')
# print('\t(ko):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/6/')

In [None]:
# by 75% (13378)

# model_dir = '/disk/data/models/framenet/mulModel-75/'
# epochs = 20

# trn, dev, tst = dataio.load_data(srl=srl, language='ko')

# trn = random.sample(trn, k=13378)
# language = 'multi'

# print('')
# print('### TRAINING')
# print('MODEL:', srl)
# print('LANGUAGE:', language)
# print('PRETRAINED BERT:', PRETRAINED_MODEL)
# print('training data:')
# print('\t(ko):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/6/')