In [1]:
import sys
import glob
import torch
sys.path.append('../')
import os
from transformers import *
from kaiser.src import utils
from kaiser.src import dataio
from kaiser.src.modeling import BertForJointShallowSemanticParsing, FrameBERT
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
from torch import nn
from torch.optim import Adam
from tqdm import tqdm, trange
from sklearn.metrics import accuracy_score
from seqeval.metrics import f1_score, precision_score, recall_score

import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
if device != "cpu":
    torch.cuda.set_device(0)
# device = torch.device('cpu')
# torch.cuda.set_device(device)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = True

import numpy as np
import random
np.random.seed(0)   
random.seed(0)
import random

from torch import autograd
torch.cuda.empty_cache()

### Korean FrameNet ###
	# contact: hahmyg@kaist, hahmyg@gmail.com #



Using TensorFlow backend.


In [2]:
# 실행시간 측정 함수
import time

_start_time = time.time()

def tic():
    global _start_time 
    _start_time = time.time()

def tac():
    t_sec = round(time.time() - _start_time)
    (t_min, t_sec) = divmod(t_sec,60)
    (t_hour,t_min) = divmod(t_min,60)
    
    result = '{}hour:{}min:{}sec'.format(t_hour,t_min,t_sec)
    return result

In [3]:
try:
    dir_path = os.path.dirname(os.path.abspath( __file__ ))
except:
    dir_path = '.'

In [4]:
srl = 'framenet'
masking = True
MAX_LEN = 256
batch_size = 3
PRETRAINED_MODEL = "bert-base-multilingual-cased"
fnversion = '1.7'
language = 'multi'

bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)

used dictionary:
	 /disk/kaiser/kaiser/src/../koreanframenet/resource/info/mul_lu2idx.json
	 /disk/kaiser/kaiser/src/../koreanframenet/resource/info/mul_lufrmap.json
	 /disk/kaiser/kaiser/src/../koreanframenet/resource/info/mul_bio_frargmap.json


In [5]:
frameBERT_dir = '/disk/data/models/frameBERT/frameBERT_en'

frameBERT = FrameBERT.from_pretrained(frameBERT_dir,
                                      num_senses = len(bert_io.sense2idx), 
                                      num_args = len(bert_io.bio_arg2idx),
                                      lufrmap=bert_io.lufrmap, 
                                      frargmap = bert_io.bio_frargmap)

In [6]:
frameBERT.to(device)
frameBERT.eval()

FrameBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
 

In [7]:
trn, dev, tst = dataio.load_data(srl=srl, language='ko')


### loading Korean FrameNet 1.1 data...
	# of instances in training data: 17838
	# of instances in dev data: 2548
	# of instances in test data: 5097
# of instances in trn: 17838
# of instances in dev: 2548
# of instances in tst: 5097
data example: [['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '<tgt>', '순이익이', '</tgt>', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '이익.n', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 'Earnings_and_losses', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'I-Earner', 'B-Time', 'X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

In [8]:
trn = random.sample(trn, k=20)
trn_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(trn)

In [10]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(768, 768),
            nn.ReLU(),
            nn.Linear(768, 768)
        )
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.layers(x)
        return x

In [15]:
mlp_model = MLP()
mlp_model.to(device)
mlp_model.train()
trn_dataloader = DataLoader(trn_data,batch_size=6)
for step, batch in enumerate(trn_dataloader):
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_orig_tok_to_maps, b_lus, b_input_senses, b_input_args, b_token_type_ids, b_masks = batch
    
    sequence_output, pooled_output = frameBERT(b_input_ids, lus=b_lus, 
                                               token_type_ids=b_token_type_ids, attention_mask=b_masks)

    print(pooled_output)
    print(pooled_output.size())
    output = mlp_model(pooled_output)
    print(output)
    print(output.size())
    
    
    
    
    break
    
    

tensor([[-0.5731,  0.9070,  0.8567,  ..., -0.1372,  0.9671,  0.9165],
        [ 0.9999,  0.6408, -0.9773,  ...,  0.8550, -0.9510,  1.0000],
        [ 0.3481, -0.5574, -0.9956,  ..., -0.9161,  0.5484,  0.9990],
        [ 0.9818, -0.0512, -0.4125,  ...,  0.9833, -0.0416, -0.9976],
        [ 0.9046,  0.9750, -0.9326,  ...,  0.9987, -0.9924, -0.9237],
        [ 0.9997, -0.8732,  0.7551,  ...,  0.9945, -0.9858,  0.1174]],
       device='cuda:0', grad_fn=<TanhBackward>)
torch.Size([6, 768])
tensor([[-0.1444, -0.1439,  0.1133,  ...,  0.1005, -0.0068, -0.1732],
        [-0.2427,  0.1414,  0.0127,  ...,  0.1931, -0.0739,  0.0942],
        [ 0.2843,  0.4282, -0.1285,  ..., -0.1522,  0.0396,  0.4727],
        [ 0.3577,  0.2650, -0.2974,  ...,  0.1314, -0.2005,  0.3429],
        [-0.1845, -0.2367,  0.0964,  ..., -0.1168,  0.0016,  0.0440],
        [ 0.0022,  0.3373,  0.3122,  ..., -0.1678,  0.1141, -0.0481]],
       device='cuda:0', grad_fn=<AddmmBackward>)
torch.Size([6, 768])


support : frame 별 예문 5개
query: definition 1개

(1) 각각을 frameBERT 통과시킴

(2) transformer encoder 통과시킴

(3) prototype 학습시킴

(4) transformer encoder --> model.save_dict

train:

    (5) frameBERT_en + transforemr 에 frame definition 통과시킴 --> prototype
    
    (6) frameBERT_ko + transformer 에 예문 통과시킴 --> prototype ko --> loss distilling
    
    (7) frame identification, arg identification 

꼭 transformer 아님
그냥 단순한 MLP 로 linear map 2개 쌓아서 하면 될 것 같음

MLP 통과시킨 support 의 평균과
MLP 통과시킨 query의 거리가 최소화 되도록 

일단 해볼것

frameBERT로 각 definition

In [19]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(10, 32, 512)
out = transformer_encoder(src)