## Entity and Intent Classification 

- 인텐트와 엔티티의 멀티테스크 러닝 학습을 수행한다. 

In [1]:
import pandas as pd
import numpy as np
from mxnet.gluon import nn, rnn
from mxnet import gluon, autograd
import gluonnlp as nlp
from mxnet import nd 
import mxnet as mx
import time
import itertools
from tqdm import tqdm
import multiprocessing as mp

In [2]:
train_raw = pd.read_csv("data/trainset.txt",names=['intent', 'entity', 'sentence'], sep='\t')
validation_raw = pd.read_csv("data/test_hidden.txt",names=['intent', 'entity', 'sentence'], sep='\t')
#validation_raw = pd.read_csv("data/validation.txt",names=['intent', 'entity', 'sentence'], sep='\t')

In [3]:
train_raw.head(10)

Unnamed: 0,intent,entity,sentence
0,area,EECCCCCCCCCCCCCCCCCCC,자강의 면적은 얼마 정도되는지 알려줄래
1,birth_date,CCCCCCCCCCCCEEECCCCCCCCCCCC,WIKI PEDIA로 변재일 생년월일을 알고 싶어
2,age,EEEEEEEEEEECCCCCCCCCCCCCCCCC,남쪽 물고기자리 알파 나이가 위키백과사전으로 얼마야
3,length,EEEECCCCCCCCCCCCCCCCCC,삼양터널의 총 길이 위키백과사전에서 뭐야
4,birth_place,EEEEEECCCCCCCCCCC,코니 윌리스의 태어난 곳은 뭐지
5,weight,CCCCCCCCCCCCEEEECCCCCCCCCCCCC,WIKI백과사전 검색 AA12의 무게가 얼만지 찾아봐
6,definition,CCCCCCCCCCCCCEEECCCCCCCC,WIKIPEDIA백과로 라이프 찾아서 말해줘
7,height,EEEEEEEECCCCCCCCCCCCCCCCCCC,송파 헬리오시티 구조물 높이 위키 피디아에서 뭐야
8,birth_date,CCCEEEEEECCCCCCCCCCCCCCC,검색 HLKVAM 언제 출생했는지를 검색해라
9,height,CCCCCCCCEEEEEECCCCCCCC,위키 피디아에 푸조 508 전고가 몇이야


### Intent Classification

#### 데이터 전처리

In [4]:
train_dataset = [(s,i,e) for i,e,s in zip(train_raw['intent'], train_raw['entity'], train_raw['sentence'])]
valid_dataset = [(s,i,e) for i,e,s in zip(validation_raw['intent'], validation_raw['entity'], validation_raw['sentence'])]

In [5]:
seq_len = 32

length_clip = nlp.data.PadSequence(seq_len, pad_val="<pad>")

def preprocess(data):
    sent, intent, entity = data
    char_sent = list(str(sent))
    char_entity = list(str(entity))
    char_intent = str(intent)
    sent_len = len(sent) if len(sent) < seq_len else seq_len
    return(length_clip(char_sent), sent_len, char_intent, length_clip(char_entity))

def preprocess_dataset(dataset):
    start = time.time()
    with mp.Pool() as pool:
        dataset = gluon.data.SimpleDataset(pool.map(preprocess, dataset))
    end = time.time()
    print('Done! Tokenizing Time={:.2f}s, #Sentences={}'
          .format(end - start, len(dataset)))
    return dataset


In [6]:
train_preprocessed  = preprocess_dataset(train_dataset)
valid_preprocessed  = preprocess_dataset(valid_dataset)

Done! Tokenizing Time=0.15s, #Sentences=9000
Done! Tokenizing Time=0.13s, #Sentences=1000


In [7]:
counter_sent   = nlp.data.count_tokens(itertools.chain.from_iterable([c for c,_,_,_ in train_preprocessed]))
counter_intent = nlp.data.count_tokens([c for _,_,c,_ in train_preprocessed])
counter_entity = nlp.data.count_tokens(itertools.chain.from_iterable([c for _,_,_,c in train_preprocessed]))

In [8]:
counter_intent

Counter({'age': 900,
         'area': 900,
         'belong_to': 900,
         'birth_date': 900,
         'birth_place': 900,
         'definition': 900,
         'height': 900,
         'length': 900,
         'weight': 900,
         'width': 900})

In [9]:
vocab_sent = nlp.Vocab(counter_sent, bos_token=None, eos_token=None, min_freq=15)
vocab_intent = nlp.Vocab(counter_intent, bos_token=None, eos_token=None, unknown_token=None, padding_token=None)
vocab_entity = nlp.Vocab(counter_entity, bos_token=None, eos_token=None, unknown_token=None, padding_token=None)

In [10]:
vocab_sent.idx_to_token[:10], vocab_intent.idx_to_token[:10], vocab_entity.idx_to_token[:10]

(['<unk>', '<pad>', ' ', 'I', '이', '색', '검', '의', '지', '아'],
 ['age',
  'area',
  'belong_to',
  'birth_date',
  'birth_place',
  'definition',
  'height',
  'length',
  'weight',
  'width'],
 ['C', '<pad>', 'E'])

In [11]:
train_preprocessed_encoded  = [(vocab_sent[sent], length ,vocab_intent[intent], vocab_entity[entity])  
                               for sent, length, intent, entity  in train_preprocessed ]
valid  = [(vocab_sent[sent], length ,vocab_intent[intent], vocab_entity[entity])  for sent, length, intent, entity in valid_preprocessed ]

In [12]:
train, test = nlp.data.train_valid_split(train_preprocessed_encoded, valid_ratio=0.1)

In [13]:
nbatch = 30
batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Stack(),
                                      nlp.data.batchify.Stack('float32'),
                                      nlp.data.batchify.Stack(),
                                      nlp.data.batchify.Stack())

train_dataloader  = gluon.data.DataLoader(train, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)
test_dataloader  = gluon.data.DataLoader(test, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)
valid_dataloader  = gluon.data.DataLoader(valid, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)

#### 모델링 

In [28]:
class IntentEntityMultiTask(gluon.HybridBlock):
    def __init__(self, vocab_size, intent_class_size, entity_class_size, num_embed, seq_len, hidden_size, **kwargs):
        super(IntentEntityMultiTask, self).__init__(**kwargs)
        self.seq_len = seq_len
        self.hidden_size = hidden_size 
        self.intent_class_size = intent_class_size
        self.entity_class_size = entity_class_size
        
        with self.name_scope():
            self.embed = nn.Embedding(input_dim=vocab_size, output_dim=num_embed)
            self.dropout = nn.Dropout(0.3)
            self.bilstm = rnn.LSTM(self.hidden_size, dropout=0.3)
            self.out_intent = nn.Dense(self.intent_class_size)
            self.bilstm_last = rnn.LSTM(self.hidden_size, dropout=0.3, bidirectional=True)
            
            self.dense_en = nn.Dense(50, flatten=False)
            self.out_entity = nn.Dense(self.entity_class_size, flatten=False)
            
    def hybrid_forward(self, F ,inputs, length):
        em_out = self.dropout(self.embed(inputs))
        bilstm = self.bilstm(em_out)
        masked_encoded_intent = F.SequenceMask(bilstm,
                                sequence_length=length,
                                use_sequence_length=True)
        agg_intent = F.broadcast_div(F.sum(masked_encoded_intent, axis=0), 
                            F.expand_dims(length, axis=1))
        intent = self.out_intent(agg_intent) 
        
        #start entity
        bilstm_last = self.bilstm_last(em_out)
        
        masked_encoded_entity = F.SequenceMask(bilstm_last,
                                sequence_length=length,
                                use_sequence_length=True).transpose((1,0,2))
        fc_entity = self.dense_en(masked_encoded_entity)
        entity = self.out_entity(fc_entity) 
        return(intent, entity)

![](model-architecture.png)

In [29]:
ctx = mx.gpu()

model = IntentEntityMultiTask(vocab_size = len(vocab_sent.idx_to_token), 
                              intent_class_size=len(vocab_intent.idx_to_token), 
                              entity_class_size=len(vocab_entity.idx_to_token), num_embed=50, seq_len=seq_len, hidden_size=30)

In [30]:
model.initialize(mx.initializer.Xavier(), ctx=ctx)

In [31]:
trainer = gluon.Trainer(model.collect_params(),"Adam")
loss = gluon.loss.SoftmaxCELoss()


In [32]:
model.hybridize()

In [33]:
print(model)

IntentEntityMultiTask(
  (out_entity): Dense(None -> 3, linear)
  (out_intent): Dense(None -> 10, linear)
  (embed): Embedding(481 -> 50, float32)
  (dropout): Dropout(p = 0.3, axes=())
  (dense_en): Dense(None -> 50, linear)
  (bilstm): LSTM(None -> 30, TNC, dropout=0.3)
  (bilstm_last): LSTM(None -> 30, TNC, dropout=0.3, bidirectional)
)


In [34]:
def evaluate_accuracy(model, data_iter, ctx=ctx):
    acc_intent = mx.metric.Accuracy()
    corrected = 0
    n = 0
    for i, (data, length, intent, entity) in enumerate(data_iter):
        data = data.as_in_context(ctx)
        intent = intent.as_in_context(ctx)
        entity = entity.as_in_context(ctx)
        length = length.as_in_context(ctx)
        intent_output, entity_output = model(data.T, length)
        intent_pred = nd.argmax(intent_output, axis=1)
        acc_intent.update(preds=intent_pred, labels=intent)
        entity_pred = nd.argmax(entity_output, axis=2)
        tf = entity_pred.astype('int64') == entity
        for i in range(length.shape[0]):
            l = int(length[i].asscalar())
            corrected += nd.sum(tf[i][:l]).asscalar() == l
            n += 1
    return(acc_intent.get()[1], corrected/n)

In [35]:
def calculate_loss(model, data_iter, loss_obj, ctx=ctx):
    intent_loss = []
    entity_loss = []
    for i, (te_data, te_length, te_intent, te_entity) in enumerate(data_iter):
        te_data = te_data.as_in_context(ctx)
        te_entity = te_entity.as_in_context(ctx)
        te_intent = te_intent.as_in_context(ctx)
        te_length = te_length.as_in_context(ctx)
        intent_output, entity_output = model(te_data.T, te_length)
        loss_int = loss_obj(intent_output, te_intent)
        curr_loss_int = nd.mean(loss_int).asscalar()
        intent_loss.append(curr_loss_int)
        loss_ent = loss_obj(entity_output, te_entity)
        curr_loss_ent = nd.mean(loss_ent).asscalar()
        entity_loss.append(curr_loss_ent)
    return(np.mean(intent_loss), np.mean(entity_loss))

In [None]:
epochs = 200


tot_test_loss = []
tot_test_accu = []
tot_train_loss = []
tot_train_accu = []
tot_valid_accu = [] 
for e in range(epochs):
    #batch training 
    for i, (data, length, intent, entity) in enumerate(tqdm(train_dataloader)):
        data = data.as_in_context(ctx)
        intent = intent.as_in_context(ctx)
        entity = entity.as_in_context(ctx)
        length = length.as_in_context(ctx)
        with autograd.record():
            out_intent, out_entity = model(data.T, length)
            loss_intent = loss(out_intent, intent)
            loss_eitity = loss(out_entity, entity)
            loss_ = loss_intent * 0.4 + loss_eitity * 0.6
            loss_.backward()
        trainer.step(data.shape[0])
    #caculate test loss
    if e % 10 == 0: 
        test_loss = calculate_loss(model, test_dataloader, loss_obj = loss, ctx=ctx) 
        train_loss = calculate_loss(model, train_dataloader, loss_obj = loss, ctx=ctx) 
        test_accu = evaluate_accuracy(model, test_dataloader,  ctx=ctx)
        train_accu = evaluate_accuracy(model, train_dataloader,  ctx=ctx)
        valid_accu = evaluate_accuracy(model, valid_dataloader,  ctx=ctx)

        print("Epoch %s. Train Loss: %s, Test Loss : %s," \
        " Test Accuracy : %s," \
        " Train Accuracy : %s : Valid Accuracy : %s" % (e, train_loss, test_loss, test_accu, train_accu, valid_accu))    
        tot_test_loss.append(test_loss)
        tot_train_loss.append(train_loss)
        tot_test_accu.append(test_accu)
        tot_train_accu.append(train_accu)
        tot_valid_accu.append(valid_accu)

100%|██████████| 270/270 [00:01<00:00, 188.26it/s]
  6%|▋         | 17/270 [00:00<00:01, 163.93it/s]

Epoch 0. Train Loss: (1.1306442, 0.045056757), Test Loss : (1.1324457, 0.04624194), Test Accuracy : (0.7777777777777778, 0.7711111111111111), Train Accuracy : (0.7785185185185185, 0.7685185185185185) : Valid Accuracy : (0.676, 0.641)


100%|██████████| 270/270 [00:01<00:00, 179.87it/s]
100%|██████████| 270/270 [00:01<00:00, 193.02it/s]
100%|██████████| 270/270 [00:01<00:00, 190.53it/s]
100%|██████████| 270/270 [00:01<00:00, 191.45it/s]
100%|██████████| 270/270 [00:01<00:00, 185.98it/s]
100%|██████████| 270/270 [00:01<00:00, 188.84it/s]
100%|██████████| 270/270 [00:01<00:00, 187.27it/s]
100%|██████████| 270/270 [00:01<00:00, 188.98it/s]
100%|██████████| 270/270 [00:01<00:00, 179.83it/s]
100%|██████████| 270/270 [00:01<00:00, 191.66it/s]
  7%|▋         | 20/270 [00:00<00:01, 197.66it/s]

Epoch 10. Train Loss: (0.03729426, 0.0034014084), Test Loss : (0.06959214, 0.005860069), Test Accuracy : (0.9877777777777778, 0.9677777777777777), Train Accuracy : (0.9974074074074074, 0.977037037037037) : Valid Accuracy : (0.846, 0.924)


100%|██████████| 270/270 [00:01<00:00, 190.61it/s]
100%|██████████| 270/270 [00:01<00:00, 189.35it/s]
100%|██████████| 270/270 [00:01<00:00, 187.37it/s]
100%|██████████| 270/270 [00:01<00:00, 190.48it/s]
100%|██████████| 270/270 [00:01<00:00, 190.43it/s]
100%|██████████| 270/270 [00:01<00:00, 181.15it/s]
100%|██████████| 270/270 [00:01<00:00, 184.80it/s]
100%|██████████| 270/270 [00:01<00:00, 191.92it/s]
100%|██████████| 270/270 [00:01<00:00, 185.29it/s]
100%|██████████| 270/270 [00:01<00:00, 190.29it/s]
  6%|▌         | 16/270 [00:00<00:01, 159.55it/s]

Epoch 20. Train Loss: (0.011029284, 0.0017870525), Test Loss : (0.03375423, 0.0058157165), Test Accuracy : (0.9922222222222222, 0.9733333333333334), Train Accuracy : (0.9991358024691358, 0.9854320987654321) : Valid Accuracy : (0.839, 0.922)


100%|██████████| 270/270 [00:01<00:00, 186.84it/s]
100%|██████████| 270/270 [00:01<00:00, 182.28it/s]
100%|██████████| 270/270 [00:01<00:00, 188.72it/s]
100%|██████████| 270/270 [00:01<00:00, 184.81it/s]
100%|██████████| 270/270 [00:01<00:00, 189.76it/s]
100%|██████████| 270/270 [00:01<00:00, 184.54it/s]
100%|██████████| 270/270 [00:01<00:00, 189.72it/s]
100%|██████████| 270/270 [00:01<00:00, 186.03it/s]
100%|██████████| 270/270 [00:01<00:00, 187.88it/s]
100%|██████████| 270/270 [00:01<00:00, 191.77it/s]
  7%|▋         | 18/270 [00:00<00:01, 172.51it/s]

Epoch 30. Train Loss: (0.0037759773, 0.00064132095), Test Loss : (0.023643184, 0.004492641), Test Accuracy : (0.9955555555555555, 0.9766666666666667), Train Accuracy : (1.0, 0.9950617283950617) : Valid Accuracy : (0.852, 0.935)


100%|██████████| 270/270 [00:01<00:00, 183.02it/s]
100%|██████████| 270/270 [00:01<00:00, 189.03it/s]
100%|██████████| 270/270 [00:01<00:00, 185.15it/s]
100%|██████████| 270/270 [00:01<00:00, 187.53it/s]
100%|██████████| 270/270 [00:01<00:00, 186.19it/s]
100%|██████████| 270/270 [00:01<00:00, 184.28it/s]
100%|██████████| 270/270 [00:01<00:00, 186.70it/s]
100%|██████████| 270/270 [00:01<00:00, 185.89it/s]
100%|██████████| 270/270 [00:01<00:00, 185.35it/s]
100%|██████████| 270/270 [00:01<00:00, 184.85it/s]
  7%|▋         | 18/270 [00:00<00:01, 176.19it/s]

Epoch 40. Train Loss: (0.0017887043, 0.00026370824), Test Loss : (0.018721739, 0.0044044647), Test Accuracy : (0.9955555555555555, 0.9811111111111112), Train Accuracy : (1.0, 0.9979012345679013) : Valid Accuracy : (0.844, 0.929)


100%|██████████| 270/270 [00:01<00:00, 183.79it/s]
100%|██████████| 270/270 [00:01<00:00, 184.49it/s]
100%|██████████| 270/270 [00:01<00:00, 181.98it/s]
100%|██████████| 270/270 [00:01<00:00, 188.84it/s]
100%|██████████| 270/270 [00:01<00:00, 182.81it/s]
100%|██████████| 270/270 [00:01<00:00, 184.42it/s]
100%|██████████| 270/270 [00:01<00:00, 190.78it/s]
100%|██████████| 270/270 [00:01<00:00, 188.87it/s]
100%|██████████| 270/270 [00:01<00:00, 189.25it/s]
100%|██████████| 270/270 [00:01<00:00, 187.68it/s]
  7%|▋         | 20/270 [00:00<00:01, 195.95it/s]

Epoch 50. Train Loss: (0.00090198487, 0.00013875663), Test Loss : (0.020827971, 0.003547297), Test Accuracy : (0.9966666666666667, 0.9822222222222222), Train Accuracy : (1.0, 0.9992592592592593) : Valid Accuracy : (0.845, 0.914)


100%|██████████| 270/270 [00:01<00:00, 182.18it/s]
100%|██████████| 270/270 [00:01<00:00, 186.77it/s]
100%|██████████| 270/270 [00:01<00:00, 189.63it/s]
100%|██████████| 270/270 [00:01<00:00, 187.83it/s]
100%|██████████| 270/270 [00:01<00:00, 190.59it/s]
100%|██████████| 270/270 [00:01<00:00, 187.32it/s]
100%|██████████| 270/270 [00:01<00:00, 187.42it/s]
100%|██████████| 270/270 [00:01<00:00, 189.50it/s]
100%|██████████| 270/270 [00:01<00:00, 186.80it/s]
100%|██████████| 270/270 [00:01<00:00, 186.94it/s]
  7%|▋         | 20/270 [00:00<00:01, 195.14it/s]

Epoch 60. Train Loss: (0.000828967, 9.077938e-05), Test Loss : (0.008679804, 0.0045302208), Test Accuracy : (0.9977777777777778, 0.9822222222222222), Train Accuracy : (1.0, 0.9993827160493827) : Valid Accuracy : (0.854, 0.94)


100%|██████████| 270/270 [00:01<00:00, 185.46it/s]
100%|██████████| 270/270 [00:01<00:00, 186.97it/s]
100%|██████████| 270/270 [00:01<00:00, 190.38it/s]
100%|██████████| 270/270 [00:01<00:00, 187.48it/s]
100%|██████████| 270/270 [00:01<00:00, 187.26it/s]
100%|██████████| 270/270 [00:01<00:00, 188.51it/s]
100%|██████████| 270/270 [00:01<00:00, 184.71it/s]
100%|██████████| 270/270 [00:01<00:00, 189.23it/s]
100%|██████████| 270/270 [00:01<00:00, 187.00it/s]
100%|██████████| 270/270 [00:01<00:00, 185.16it/s]
  7%|▋         | 20/270 [00:00<00:01, 192.96it/s]

Epoch 70. Train Loss: (0.00036503948, 2.7286307e-05), Test Loss : (0.0069011147, 0.002193475), Test Accuracy : (0.9977777777777778, 0.9877777777777778), Train Accuracy : (1.0, 0.9998765432098765) : Valid Accuracy : (0.862, 0.915)


100%|██████████| 270/270 [00:01<00:00, 185.39it/s]
100%|██████████| 270/270 [00:01<00:00, 186.09it/s]
100%|██████████| 270/270 [00:01<00:00, 185.57it/s]
100%|██████████| 270/270 [00:01<00:00, 189.87it/s]
100%|██████████| 270/270 [00:01<00:00, 182.50it/s]
100%|██████████| 270/270 [00:01<00:00, 186.34it/s]
100%|██████████| 270/270 [00:01<00:00, 188.67it/s]
100%|██████████| 270/270 [00:01<00:00, 183.65it/s]
100%|██████████| 270/270 [00:01<00:00, 186.69it/s]
100%|██████████| 270/270 [00:01<00:00, 186.38it/s]
  6%|▋         | 17/270 [00:00<00:01, 166.01it/s]

Epoch 80. Train Loss: (0.0003008305, 1.7000959e-05), Test Loss : (0.015655918, 0.0034545886), Test Accuracy : (0.9966666666666667, 0.9822222222222222), Train Accuracy : (1.0, 0.9998765432098765) : Valid Accuracy : (0.862, 0.943)


100%|██████████| 270/270 [00:01<00:00, 185.59it/s]
100%|██████████| 270/270 [00:01<00:00, 183.07it/s]
100%|██████████| 270/270 [00:01<00:00, 185.73it/s]
100%|██████████| 270/270 [00:01<00:00, 188.95it/s]
100%|██████████| 270/270 [00:01<00:00, 187.92it/s]
100%|██████████| 270/270 [00:01<00:00, 187.04it/s]
100%|██████████| 270/270 [00:01<00:00, 186.51it/s]
100%|██████████| 270/270 [00:01<00:00, 187.14it/s]
100%|██████████| 270/270 [00:01<00:00, 188.63it/s]
100%|██████████| 270/270 [00:01<00:00, 187.24it/s]
  7%|▋         | 20/270 [00:00<00:01, 194.58it/s]

Epoch 90. Train Loss: (0.00017531229, 8.08034e-06), Test Loss : (0.012422423, 0.0017884199), Test Accuracy : (0.9977777777777778, 0.9933333333333333), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.846, 0.946)


 85%|████████▌ | 230/270 [00:01<00:00, 185.55it/s]

In [37]:
tot_valid_accu

[(0.676, 0.641),
 (0.846, 0.924),
 (0.839, 0.922),
 (0.852, 0.935),
 (0.844, 0.929),
 (0.845, 0.914),
 (0.854, 0.94),
 (0.862, 0.915),
 (0.862, 0.943),
 (0.846, 0.946),
 (0.845, 0.95),
 (0.844, 0.942),
 (0.822, 0.913),
 (0.858, 0.942),
 (0.838, 0.935),
 (0.844, 0.957),
 (0.855, 0.953),
 (0.852, 0.955),
 (0.858, 0.947),
 (0.851, 0.955)]

#### Model export and Visualize 

In [25]:
model.export("model")

Netron으로 네트워크 시각화 

- https://lutzroeder.github.io/netron/
- 저장된 `model-symbol.json`을 입력해 시각화 

In [38]:
load_model = gluon.nn.SymbolBlock.imports("model-symbol.json", ['data0', 'data1'], 
                                          "model-0000.params")

In [39]:
def get_entitytag(sent):
    sent_len = len(sent)
    coded_sent = vocab_sent[length_clip(list(sent))]
    co = nd.array(coded_sent).expand_dims(axis=1)
    _, ret_code = load_model(co, nd.array([sent_len,]))
    ret_seq = vocab_entity.to_tokens(ret_code.argmax(axis=2)[0].asnumpy().astype('int').tolist())
    return(''.join(ret_seq))

In [40]:
get_entitytag("아이유가 신곡을 낸 이유")

'EEECCCCCCCCCC<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

In [41]:
def get_intent(sent):
    sent_len = len(sent)
    coded_sent = vocab_sent[length_clip(list(sent))]
    co = nd.array(coded_sent).expand_dims(axis=1)
    ret_code,_ = load_model(co, nd.array([sent_len,]))
    ret_seq = vocab_intent.to_tokens(ret_code.argmax(axis=1).asnumpy().astype('int').tolist())
    return(''.join(ret_seq))

In [42]:
get_intent("모두의 연구소에 대해서 찾아줘")

'definition'

### TODO
- 개별 Intent와 Entity 모형을 하나의 모형으로 구축해본다. (Multi-Task Learning) 
  - 분류 성능이 좋아지는가? 학습 수렴 속도는 어떠한가?