In [1]:
import sys
import glob
import torch
sys.path.append('../')
import os
from transformers import *
from kaiser.src import utils
from kaiser.src import dataio
from kaiser.src.modeling import BertForJointShallowSemanticParsing
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
from torch import nn
from torch.optim import Adam
from tqdm import tqdm, trange
from sklearn.metrics import accuracy_score
from seqeval.metrics import f1_score, precision_score, recall_score

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
if device != "cpu":
    torch.cuda.set_device(0)
# device = torch.device('cpu')
# torch.cuda.set_device(device)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = True

import numpy as np
import random
np.random.seed(0)   
random.seed(0)

from torch import autograd
torch.cuda.empty_cache()

Using TensorFlow backend.


### Korean FrameNet ###
	# contact: hahmyg@kaist, hahmyg@gmail.com #



In [2]:
import random

In [3]:
# 실행시간 측정 함수
import time

_start_time = time.time()

def tic():
    global _start_time 
    _start_time = time.time()

def tac():
    t_sec = round(time.time() - _start_time)
    (t_min, t_sec) = divmod(t_sec,60)
    (t_hour,t_min) = divmod(t_min,60)
    
    result = '{}hour:{}min:{}sec'.format(t_hour,t_min,t_sec)
    return result

In [4]:
try:
    dir_path = os.path.dirname(os.path.abspath( __file__ ))
except:
    dir_path = '.'

In [5]:
def train(retrain=False, pretrained_dir=False):
    if pretrained_dir:
        print('original model:', pretrained_dir)
    else:
        print('original model:', 'BERT-multilingual-base')
    print('\n\tyour model would be saved at', model_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # load a model first
    if retrain:
#         model_saved_path = pretrained_dir
        model = BertForJointShallowSemanticParsing.from_pretrained(pretrained_dir, 
                                                                   num_senses = len(bert_io.sense2idx), 
                                                                   num_args = len(bert_io.bio_arg2idx),
                                                                   lufrmap=bert_io.lufrmap, 
                                                                   frargmap = bert_io.bio_frargmap)
    else:
#         model_saved_path = PRETRAINED_MODEL
        model = BertForJointShallowSemanticParsing.from_pretrained(PRETRAINED_MODEL, 
                                                                   num_senses = len(bert_io.sense2idx), 
                                                                   num_args = len(bert_io.bio_arg2idx),
                                                                   lufrmap=bert_io.lufrmap, 
                                                                   frargmap = bert_io.bio_frargmap)
#         model.to(device)
    
    print('retrain:', retrain)
    tic()
    print('\n### converting data to BERT input...')
    trn_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(trn)
    print('\t ...is done:', tac())
    print('\t#of instance:', len(trn), len(trn_data))
    sampler = RandomSampler(trn)
    trn_dataloader = DataLoader(trn_data, sampler=sampler, batch_size=batch_size)
    
    # load optimizer
#     FULL_FINETUNING = True
#     if FULL_FINETUNING:
#         param_optimizer = list(model.named_parameters())
#         no_decay = ['bias', 'gamma', 'beta']
#         optimizer_grouped_parameters = [
#             {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
#              'weight_decay_rate': 0.01},
#             {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
#              'weight_decay_rate': 0.0}
#         ]
#     else:
#         param_optimizer = list(model.classifier.named_parameters()) 
#         optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]
#     optimizer = Adam(optimizer_grouped_parameters, lr=3e-5)

    lr = 1e-3    
    optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)
    num_training_steps = len(trn_dataloader) // epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)  # PyTorch scheduler
    
    max_grad_norm = 1.0
#     global_step = 0
#     num_of_epoch = 1
    num_of_epoch = 1
    accuracy_result = []
    for _ in trange(epochs, desc="Epoch"):
        # load a fine-tuned model
#         print('epoch:', num_of_epoch)
#         print('epoch-1 model:', model_saved_path)
#         model = BertForJointShallowSemanticParsing.from_pretrained(model_saved_path, 
#                                                                    num_senses = len(bert_io.sense2idx), 
#                                                                    num_args = len(bert_io.bio_arg2idx),
#                                                                    lufrmap=bert_io.lufrmap, 
#                                                                    frargmap = bert_io.bio_frargmap)
#         model.to(device)
        
#         lr = 5e-5
#         optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)
#         num_training_steps = len(trn_dataloader) // epochs
#         scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)  # PyTorch scheduler
        
        # TRAIN loop
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        for step, batch in enumerate(trn_dataloader):
            model.train()
            # add batch to gpu
            torch.cuda.set_device(0)
#             torch.cuda.set_device(device)
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_orig_tok_to_maps, b_input_lus, b_input_senses, b_input_args, b_token_type_ids, b_input_masks = batch            
#             print(b_token_type_ids[0])
            # forward pass
#             with autograd.detect_anomaly():
            loss = model(b_input_ids, lus=b_input_lus, senses=b_input_senses, args=b_input_args,
                     token_type_ids=b_token_type_ids, attention_mask=b_input_masks)
            # backward pass


            loss.backward()
            # track train loss
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
            # gradient clipping
            torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
            # update parameters
            optimizer.step()
            scheduler.step()
            model.zero_grad()

        print("Train loss: {}".format(tr_loss/nb_tr_steps))
        
        # save your model
        model_saved_path = model_dir+str(num_of_epoch)+'/'
        print('\n\tyour model is saved:', model_saved_path)
        if not os.path.exists(model_saved_path):
            os.makedirs(model_saved_path)
        model.save_pretrained(model_saved_path)
        
        # load a fine-tuned model
#         model = BertForJointShallowSemanticParsing.from_pretrained(model_saved_path, 
#                                                                    num_senses = len(bert_io.sense2idx), 
#                                                                    num_args = len(bert_io.bio_arg2idx),
#                                                                    lufrmap=bert_io.lufrmap, 
#                                                                    frargmap = bert_io.bio_frargmap)
#         model.to(device)
        
        num_of_epoch += 1

        
#         break
    print('...training is done')

In [6]:
srl = 'framenet'
masking = True
MAX_LEN = 256
batch_size = 6
PRETRAINED_MODEL = "bert-base-multilingual-cased"
fnversion = '1.7'
language = 'multi'

# (1) train En-FN with exemplars model

In [7]:
model_dir = '/disk/data/models/framenet/enModel-with-exemplar/'
trn, dev, tst = dataio.load_data(srl=srl, language='en')
# epochs = 10
epochs = 9

print('')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(en):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train()
# train(retrain=True, pretrained_dir='/disk/data/models/framenet/enModel-with-exemplar/0/')

# of instances in trn: 211812
# of instances in dev: 2272
# of instances in tst: 6714
data example: [['Greece', 'wildfires', 'force', 'thousands', 'to', '<tgt>', 'evacuate', '</tgt>'], ['_', '_', '_', '_', '_', '_', 'evacuate.v', '_'], ['_', '_', '_', '_', '_', '_', 'Escaping', '_'], ['O', 'O', 'O', 'B-Escapee', 'O', 'X', 'O', 'X']]

### TRAINING
MODEL: framenet
LANGUAGE: multi
PRETRAINED BERT: bert-base-multilingual-cased
training data:
	(en): 211812
BATCH_SIZE: 6
MAX_LEN: 256



# (2) fine-tuning by Korean FrameNet

In [8]:
# by 100%

model_dir = '/disk/data/models/framenet/mulModel-100/'
epochs = 20

trn, dev, tst = dataio.load_data(srl=srl, language='ko')

language = 'multi'

print('')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/6/')


### loading Korean FrameNet 1.1 data...
	# of instances in training data: 17838
	# of instances in dev data: 2548
	# of instances in test data: 5097
# of instances in trn: 17838
# of instances in dev: 2548
# of instances in tst: 5097
data example: [['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '<tgt>', '순이익이', '</tgt>', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '이익.n', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 'Earnings_and_losses', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'B-Time', 'X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

In [9]:
# by 25% (4460)

model_dir = '/disk/data/models/framenet/mulModel-25/'
epochs = 50

trn, dev, tst = dataio.load_data(srl=srl, language='ko')

# trn = random.sample(trn, k=4460)
trn = random.sample(trn, k=50)
language = 'multi'

print('')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
train(retrain=True, pretrained_dir='/disk/data/models/framenet/enModel-with-exemplar/0/')


### loading Korean FrameNet 1.1 data...
	# of instances in training data: 17838
	# of instances in dev data: 2548
	# of instances in test data: 5097
# of instances in trn: 17838
# of instances in dev: 2548
# of instances in tst: 5097
data example: [['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '<tgt>', '순이익이', '</tgt>', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '이익.n', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 'Earnings_and_losses', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'B-Time', 'X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

	 ...is done: 0hour:0min:0sec
	#of instance: 50 50
epoch: 1
epoch-1 model: /disk/data/models/framenet/enModel-with-exemplar/0/


  pred_logits = sm(masked_logit).view(1,-1)


Train loss: 1.4148908919758267

	your model is saved: /disk/data/models/framenet/mulModel-25/1/


Epoch:   2%|▏         | 1/50 [00:12<10:28, 12.82s/it]

epoch: 2
epoch-1 model: /disk/data/models/framenet/mulModel-25/1/
Train loss: 1.3843992749849956

	your model is saved: /disk/data/models/framenet/mulModel-25/2/


Epoch:   4%|▍         | 2/50 [00:24<09:53, 12.36s/it]

epoch: 3
epoch-1 model: /disk/data/models/framenet/mulModel-25/2/
Train loss: 1.3214147090911865

	your model is saved: /disk/data/models/framenet/mulModel-25/3/


Epoch:   6%|▌         | 3/50 [00:35<09:21, 11.95s/it]

epoch: 4
epoch-1 model: /disk/data/models/framenet/mulModel-25/3/
Train loss: 1.3318035470114813

	your model is saved: /disk/data/models/framenet/mulModel-25/4/


Epoch:   8%|▊         | 4/50 [00:46<09:01, 11.77s/it]

epoch: 5
epoch-1 model: /disk/data/models/framenet/mulModel-25/4/
Train loss: 1.3758887582355075

	your model is saved: /disk/data/models/framenet/mulModel-25/5/


Epoch:  10%|█         | 5/50 [00:57<08:40, 11.57s/it]

epoch: 6
epoch-1 model: /disk/data/models/framenet/mulModel-25/5/
Train loss: 1.317187786102295

	your model is saved: /disk/data/models/framenet/mulModel-25/6/


Epoch:  12%|█▏        | 6/50 [01:09<08:28, 11.55s/it]

epoch: 7
epoch-1 model: /disk/data/models/framenet/mulModel-25/6/
Train loss: 1.3251810404989455

	your model is saved: /disk/data/models/framenet/mulModel-25/7/


Epoch:  14%|█▍        | 7/50 [01:20<08:09, 11.38s/it]

epoch: 8
epoch-1 model: /disk/data/models/framenet/mulModel-25/7/
Train loss: 1.3111643857426114

	your model is saved: /disk/data/models/framenet/mulModel-25/8/


Epoch:  16%|█▌        | 8/50 [01:31<07:54, 11.31s/it]

epoch: 9
epoch-1 model: /disk/data/models/framenet/mulModel-25/8/
Train loss: 1.3505793147616916

	your model is saved: /disk/data/models/framenet/mulModel-25/9/


Epoch:  18%|█▊        | 9/50 [01:42<07:42, 11.28s/it]

epoch: 10
epoch-1 model: /disk/data/models/framenet/mulModel-25/9/
Train loss: 1.3792431950569153

	your model is saved: /disk/data/models/framenet/mulModel-25/10/


Epoch:  20%|██        | 10/50 [01:53<07:29, 11.24s/it]

epoch: 11
epoch-1 model: /disk/data/models/framenet/mulModel-25/10/
Train loss: 1.3404770559734769

	your model is saved: /disk/data/models/framenet/mulModel-25/11/


Epoch:  22%|██▏       | 11/50 [02:04<07:16, 11.20s/it]

epoch: 12
epoch-1 model: /disk/data/models/framenet/mulModel-25/11/
Train loss: 1.3467484845055475

	your model is saved: /disk/data/models/framenet/mulModel-25/12/


Epoch:  24%|██▍       | 12/50 [02:15<07:04, 11.16s/it]

epoch: 13
epoch-1 model: /disk/data/models/framenet/mulModel-25/12/
Train loss: 1.3739228314823575

	your model is saved: /disk/data/models/framenet/mulModel-25/13/


Epoch:  26%|██▌       | 13/50 [02:26<06:51, 11.13s/it]

epoch: 14
epoch-1 model: /disk/data/models/framenet/mulModel-25/13/
Train loss: 1.4698355860180325

	your model is saved: /disk/data/models/framenet/mulModel-25/14/


Epoch:  28%|██▊       | 14/50 [02:37<06:40, 11.13s/it]

epoch: 15
epoch-1 model: /disk/data/models/framenet/mulModel-25/14/
Train loss: 1.4548249708281622

	your model is saved: /disk/data/models/framenet/mulModel-25/15/


Epoch:  30%|███       | 15/50 [02:49<06:30, 11.15s/it]

epoch: 16
epoch-1 model: /disk/data/models/framenet/mulModel-25/15/


KeyboardInterrupt: 

In [None]:
# by 50% (8919)

model_dir = '/disk/data/models/framenet/mulModel-50/'
epochs = 20

trn, dev, tst = dataio.load_data(srl=srl, language='ko')

trn = random.sample(trn, k=8919)
language = 'multi'

print('')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/6/')

In [None]:
# by 75% (13378)

model_dir = '/disk/data/models/framenet/mulModel-75/'
epochs = 20

trn, dev, tst = dataio.load_data(srl=srl, language='ko')

trn = random.sample(trn, k=13378)
language = 'multi'

print('')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train(retrain=True, pretrained_dir='/disk/data/models/dict_framenet/enModel-with-exemplar/6/')