## multi task learning _ Entity Taggging &  Intent classification

In [56]:
import pandas as pd
import numpy as np
from mxnet.gluon import nn, rnn
from mxnet import gluon, autograd
import gluonnlp as nlp
from mxnet import nd 
import mxnet as mx
import time
import itertools
from tqdm import tqdm
import multiprocessing as mp

In [57]:
train_raw = pd.read_csv("trainset.txt",names=['intent', 'entity', 'sentence'], sep='\t')
validation_raw = pd.read_csv("test_hidden.txt",names=['intent', 'entity', 'sentence'], sep='\t')
#validation_raw = pd.read_csv("validation.txt",names=['intent', 'entity', 'sentence'], sep='\t')

In [58]:
train_raw.head(30)

Unnamed: 0,intent,entity,sentence
0,area,EECCCCCCCCCCCCCCCCCCC,자강의 면적은 얼마 정도되는지 알려줄래
1,birth_date,CCCCCCCCCCCCEEECCCCCCCCCCCC,WIKI PEDIA로 변재일 생년월일을 알고 싶어
2,age,EEEEEEEEEEECCCCCCCCCCCCCCCCC,남쪽 물고기자리 알파 나이가 위키백과사전으로 얼마야
3,length,EEEECCCCCCCCCCCCCCCCCC,삼양터널의 총 길이 위키백과사전에서 뭐야
4,birth_place,EEEEEECCCCCCCCCCC,코니 윌리스의 태어난 곳은 뭐지
5,weight,CCCCCCCCCCCCEEEECCCCCCCCCCCCC,WIKI백과사전 검색 AA12의 무게가 얼만지 찾아봐
6,definition,CCCCCCCCCCCCCEEECCCCCCCC,WIKIPEDIA백과로 라이프 찾아서 말해줘
7,height,EEEEEEEECCCCCCCCCCCCCCCCCCC,송파 헬리오시티 구조물 높이 위키 피디아에서 뭐야
8,birth_date,CCCEEEEEECCCCCCCCCCCCCCC,검색 HLKVAM 언제 출생했는지를 검색해라
9,height,CCCCCCCCEEEEEECCCCCCCC,위키 피디아에 푸조 508 전고가 몇이야


#### 데이터 전처리

In [59]:
train_dataset = [(s, i, e) for i,e,s in zip(train_raw['intent'], train_raw['entity'], train_raw['sentence'])]
valid_dataset = [(s, i, e) for i,e,s in zip(validation_raw['intent'], validation_raw['entity'], validation_raw['sentence'])]

In [60]:
train_dataset[0]

('자강의 면적은 얼마 정도되는지 알려줄래', 'area', 'EECCCCCCCCCCCCCCCCCCC')

In [61]:
seq_len = 32

length_clip = nlp.data.PadSequence(seq_len, pad_val="<pad>")

def preprocess(data):
    sent, intent, entity = data
    char_sent = list(str(sent))
    char_entity = list(str(entity))
    char_intent = str(intent)
    return(length_clip(char_sent), len(sent), char_intent, length_clip(char_entity))

def preprocess_dataset(dataset):
    start = time.time()
    with mp.Pool() as pool:
        dataset = gluon.data.SimpleDataset(pool.map(preprocess, dataset))
    end = time.time()
    print('Done! Tokenizing Time={:.2f}s, #Sentences={}'
          .format(end - start, len(dataset)))
    return dataset


In [62]:
train_preprocessed  = preprocess_dataset(train_dataset)
valid_preprocessed  = preprocess_dataset(valid_dataset)

Done! Tokenizing Time=0.24s, #Sentences=9000
Done! Tokenizing Time=0.25s, #Sentences=1000


In [63]:
counter_sent   = nlp.data.count_tokens(itertools.chain.from_iterable([c for c, _, _, _ in train_preprocessed]))
counter_intent = nlp.data.count_tokens([c for _,_,c,_ in train_preprocessed])
counter_entity = nlp.data.count_tokens(itertools.chain.from_iterable([c for _,_,_,c in train_preprocessed]))


In [64]:
counter_intent

Counter({'age': 900,
         'area': 900,
         'belong_to': 900,
         'birth_date': 900,
         'birth_place': 900,
         'definition': 900,
         'height': 900,
         'length': 900,
         'weight': 900,
         'width': 900})

In [65]:
vocab_sent = nlp.Vocab(counter_sent, bos_token=None, eos_token=None, min_freq=15)
vocab_intent = nlp.Vocab(counter_intent, bos_token=None, eos_token=None, unknown_token=None, padding_token=None)
vocab_entity = nlp.Vocab(counter_entity, bos_token=None, eos_token=None, unknown_token=None , padding_token=None)


In [66]:
vocab_sent.idx_to_token[:10], vocab_entity.idx_to_token[:10], vocab_intent.idx_to_token[:10], 

(['<unk>', '<pad>', ' ', 'I', '이', '색', '검', '의', '지', '아'],
 ['C', '<pad>', 'E'],
 ['age',
  'area',
  'belong_to',
  'birth_date',
  'birth_place',
  'definition',
  'height',
  'length',
  'weight',
  'width'])

In [67]:
train_preprocessed_encoded  = [(vocab_sent[sent], length,vocab_intent[intent] ,vocab_entity[entity] )  
                               for sent, length ,intent, entity in train_preprocessed ]
valid  = [(vocab_sent[sent], length ,vocab_intent[intent],vocab_entity[entity])  for sent, length , intent,entity in valid_preprocessed ]

In [68]:
train, test = nlp.data.train_valid_split(train_preprocessed_encoded, valid_ratio=0.1)

In [69]:
nbatch = 30
batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Stack(),
                                      nlp.data.batchify.Stack('float32'),
                                      nlp.data.batchify.Stack(),
                                      nlp.data.batchify.Stack())

train_dataloader  = gluon.data.DataLoader(train, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)
test_dataloader  = gluon.data.DataLoader(test, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)
valid_dataloader  = gluon.data.DataLoader(valid, batch_size=nbatch, batchify_fn=batchify_fn, shuffle=True)

#### 모델링 

In [82]:
class EntityTagger_IntentClassification(gluon.HybridBlock):
    def __init__(self, vocab_size, intent_class_size, entity_class_size, num_embed, seq_len, hidden_size, **kwargs):
        super(EntityTagger_IntentClassification, self).__init__(**kwargs)
        self.seq_len = seq_len
        self.hidden_size = hidden_size 
        self.intent_class_size = intent_class_size
        self.entity_class_size = entity_class_size
        with self.name_scope():
            self.embed = nn.Embedding(input_dim=vocab_size, output_dim=num_embed)
            self.dropout = nn.Dropout(0.3)
            self.bilstm = rnn.LSTM(self.hidden_size, dropout=0.3)
            self.out_intent =nn.Dense(self.intent_class_size)
            self.bilstm_last = rnn.LSTM(self.hidden_size, dropout=0.3, bidirectional=True)
            
            #self.bigru = rnn.GRU(self.hidden_size, dropout=0.2, bidirectional=True)
            self.dense_en = nn.Dense(50, flatten=False)
            self.out_entity = nn.Dense(self.entity_class_size, flatten=False)
            #self.dense = nn.Dense(self.vocab_out_size, flatten=False)        
                        
    def hybrid_forward(self, F ,inputs, length):
        em_out = self.embed(inputs)
        #bigruout = self.bigru(em_out)
        bilstm = self.bilstm(em_out)
        masked_encoded_intent = F.SequenceMask(bilstm,
                                        sequence_length=length,
                                        use_sequence_length=True)
        agg_intent = F.broadcast_div(F.sum(masked_encoded_intent, axis=0), F.expand_dims(length, axis=1))
        intent = self.out_intent(agg_intent)
        
        bilstm_last = self.bilstm_last(em_out)        
        masked_encoded_entity = F.SequenceMask(bilstm_last,
                                        sequence_length=length,
                                        use_sequence_length=True).transpose((1,0,2))
        fc_entity = self.dense_en(masked_encoded_entity)
        entity = self.out_entity(fc_entity) 
        return(intent, entity)

In [83]:
ctx = mx.gpu()

model = EntityTagger_IntentClassification(vocab_size = len(vocab_sent.idx_to_token), 
                                          entity_class_size=len(vocab_entity.idx_to_token),
                                          intent_class_size=len(vocab_intent.idx_to_token),
                                          num_embed=50, seq_len=seq_len, hidden_size=30)

In [84]:
model.initialize(mx.initializer.Xavier(), ctx=ctx)

In [85]:
trainer = gluon.Trainer(model.collect_params(),"Adam")
loss = gluon.loss.SoftmaxCELoss() 

In [86]:
model.hybridize()

In [87]:
model

EntityTagger_IntentClassification(
  (out_intent): Dense(None -> 10, linear)
  (out_entity): Dense(None -> 3, linear)
  (bilstm_last): LSTM(None -> 30, TNC, dropout=0.3, bidirectional)
  (embed): Embedding(481 -> 50, float32)
  (dropout): Dropout(p = 0.3, axes=())
  (bilstm): LSTM(None -> 30, TNC, dropout=0.3)
  (dense_en): Dense(None -> 50, linear)
)

In [88]:
def evaluate_accuracy(model, data_iter, ctx=ctx):
    corrected = 0
    n = 0
    acc_intent = mx.metric.Accuracy()
    for i, (data, length, intent, entity) in enumerate(data_iter):
        data = data.as_in_context(ctx)
        intent = intent.as_in_context(ctx)
        entity = entity.as_in_context(ctx)
        length = length.as_in_context(ctx)
        intent_output, entity_output = model(data.T, length)
        intent_predictions = nd.argmax(intent_output, axis=1)
        acc_intent.update(preds=intent_predictions, labels=intent)        
        entity_predictions = nd.argmax(entity_output, axis=2) 
        tf = entity_predictions.astype('int64') == entity
        for i in range(length.shape[0]):
            l = int(length[i].asscalar())
            corrected += nd.sum(tf[i][:l]).asscalar() == l
            n += 1
    return(corrected/n), (acc_intent.get()[1])

In [91]:
def calculate_loss(model, data_iter, loss_obj, ctx=ctx):
    intent_loss = []
    entity_loss = []
    for i, (te_data, te_length, te_intent, te_entity) in enumerate(data_iter):
        te_data = te_data.as_in_context(ctx)
        te_intent = te_intent.as_in_context(ctx)
        te_entity = te_entity.as_in_context(ctx)
        te_length = te_length.as_in_context(ctx)
        intent_output, entity_output = model(te_data.T, te_length)
        
        loss_intent = loss_obj(intent_output, te_intent)
        curr_loss_intent = nd.mean(loss_intent).asscalar()
        intent_loss.append(curr_loss_intent)
        
        loss_entity = loss_obj(entity_output, te_entity)
        curr_loss_entity = nd.mean(loss_entity).asscalar()
        entity_loss.append(curr_loss_entity)
    return(np.mean(intent_loss), np.mean(entity_loss))

In [92]:
epochs = 100

tot_test_loss = []
tot_train_loss = []
tot_test_accu = []
tot_train_accu = []
tot_valid_accu = [] 

for e in range(epochs):
    #batch training 
    for i, (data, length, intent, entity) in enumerate(tqdm(train_dataloader)):
        data = data.as_in_context(ctx)
        intent = intent.as_in_context(ctx)
        entity = entity.as_in_context(ctx)
        length = length.as_in_context(ctx)
        with autograd.record():
            out_intent, out_entity = model(data.T, length)
            loss_intent = loss(out_intent, intent)
            loss_entity = loss(out_entity, entity)
            loss_ = loss_intent * 0.4 + loss_entity * 0.6
            loss_.backward()
        trainer.step(data.shape[0])

    #caculate test loss
    if e % 10 == 0: 
        test_loss = calculate_loss(model, test_dataloader, loss_obj = loss, ctx=ctx) 
        train_loss = calculate_loss(model, train_dataloader, loss_obj = loss, ctx=ctx)
        test_accu = evaluate_accuracy(model, test_dataloader,  ctx=ctx)
        train_accu = evaluate_accuracy(model, train_dataloader,  ctx=ctx)
        valid_accu = evaluate_accuracy(model, valid_dataloader,  ctx=ctx)
        print("Epoch %s. Train Loss: %s, Test Loss : %s," \
        " Test Accuracy : %s," \
        " Train Accuracy : %s : Valid Accuracy : %s" % (e, train_loss, test_loss, test_accu, train_accu, valid_accu))    
        tot_test_loss.append(test_loss)
        tot_train_loss.append(train_loss)
        tot_test_accu.append(test_accu)
        tot_train_accu.append(train_accu)
        tot_valid_accu.append(valid_accu)


100%|██████████| 270/270 [00:01<00:00, 266.13it/s]
 11%|█         | 30/270 [00:00<00:00, 291.67it/s]

Epoch 0. Train Loss: (0.35134175, 0.02359023), Test Loss : (0.40839735, 0.027739711), Test Accuracy : (0.8233333333333334, 0.9366666666666666), Train Accuracy : (0.8524691358024692, 0.9637037037037037) : Valid Accuracy : (0.748, 0.81)


100%|██████████| 270/270 [00:00<00:00, 292.67it/s]
100%|██████████| 270/270 [00:00<00:00, 274.60it/s]
100%|██████████| 270/270 [00:00<00:00, 283.13it/s]
100%|██████████| 270/270 [00:00<00:00, 274.78it/s]
100%|██████████| 270/270 [00:00<00:00, 282.03it/s]
100%|██████████| 270/270 [00:00<00:00, 280.42it/s]
100%|██████████| 270/270 [00:00<00:00, 277.56it/s]
100%|██████████| 270/270 [00:00<00:00, 272.10it/s]
100%|██████████| 270/270 [00:00<00:00, 281.00it/s]
100%|██████████| 270/270 [00:00<00:00, 284.94it/s]
 11%|█         | 29/270 [00:00<00:00, 288.48it/s]

Epoch 10. Train Loss: (0.021320323, 0.0019323159), Test Loss : (0.11305806, 0.007870059), Test Accuracy : (0.9633333333333334, 0.9777777777777777), Train Accuracy : (0.9861728395061728, 0.9996296296296296) : Valid Accuracy : (0.901, 0.824)


100%|██████████| 270/270 [00:00<00:00, 292.07it/s]
100%|██████████| 270/270 [00:00<00:00, 287.43it/s]
100%|██████████| 270/270 [00:00<00:00, 292.04it/s]
100%|██████████| 270/270 [00:00<00:00, 292.67it/s]
100%|██████████| 270/270 [00:00<00:00, 296.06it/s]
100%|██████████| 270/270 [00:00<00:00, 291.24it/s]
100%|██████████| 270/270 [00:00<00:00, 288.33it/s]
100%|██████████| 270/270 [00:00<00:00, 290.72it/s]
100%|██████████| 270/270 [00:00<00:00, 289.58it/s]
100%|██████████| 270/270 [00:00<00:00, 288.87it/s]
 11%|█         | 29/270 [00:00<00:00, 284.38it/s]

Epoch 20. Train Loss: (0.0046512173, 0.001162052), Test Loss : (0.09036876, 0.00788207), Test Accuracy : (0.9588888888888889, 0.9811111111111112), Train Accuracy : (0.9872839506172839, 1.0) : Valid Accuracy : (0.893, 0.822)


100%|██████████| 270/270 [00:00<00:00, 287.70it/s]
100%|██████████| 270/270 [00:00<00:00, 285.75it/s]
100%|██████████| 270/270 [00:00<00:00, 294.69it/s]
100%|██████████| 270/270 [00:00<00:00, 295.46it/s]
100%|██████████| 270/270 [00:00<00:00, 297.20it/s]
100%|██████████| 270/270 [00:00<00:00, 295.11it/s]
100%|██████████| 270/270 [00:00<00:00, 290.48it/s]
100%|██████████| 270/270 [00:00<00:00, 289.62it/s]
100%|██████████| 270/270 [00:00<00:00, 290.08it/s]
100%|██████████| 270/270 [00:00<00:00, 292.50it/s]
 11%|█         | 30/270 [00:00<00:00, 293.63it/s]

Epoch 30. Train Loss: (0.0021207987, 6.422827e-05), Test Loss : (0.06327557, 0.007953479), Test Accuracy : (0.9722222222222222, 0.9844444444444445), Train Accuracy : (0.9997530864197531, 1.0) : Valid Accuracy : (0.939, 0.835)


100%|██████████| 270/270 [00:00<00:00, 291.75it/s]
100%|██████████| 270/270 [00:00<00:00, 286.78it/s]
100%|██████████| 270/270 [00:00<00:00, 287.20it/s]
100%|██████████| 270/270 [00:00<00:00, 291.99it/s]
100%|██████████| 270/270 [00:00<00:00, 290.05it/s]
100%|██████████| 270/270 [00:00<00:00, 290.06it/s]
100%|██████████| 270/270 [00:00<00:00, 295.29it/s]
100%|██████████| 270/270 [00:00<00:00, 294.93it/s]
100%|██████████| 270/270 [00:00<00:00, 290.22it/s]
100%|██████████| 270/270 [00:00<00:00, 290.22it/s]
 11%|█         | 29/270 [00:00<00:00, 289.32it/s]

Epoch 40. Train Loss: (0.0005918092, 4.9288956e-06), Test Loss : (0.12045789, 0.010654632), Test Accuracy : (0.97, 0.9766666666666667), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.933, 0.822)


100%|██████████| 270/270 [00:00<00:00, 285.58it/s]
100%|██████████| 270/270 [00:00<00:00, 291.53it/s]
100%|██████████| 270/270 [00:00<00:00, 290.84it/s]
100%|██████████| 270/270 [00:00<00:00, 292.18it/s]
100%|██████████| 270/270 [00:00<00:00, 287.04it/s]
100%|██████████| 270/270 [00:00<00:00, 288.48it/s]
100%|██████████| 270/270 [00:00<00:00, 290.66it/s]
100%|██████████| 270/270 [00:00<00:00, 291.48it/s]
100%|██████████| 270/270 [00:00<00:00, 293.22it/s]
100%|██████████| 270/270 [00:00<00:00, 288.60it/s]
 11%|█         | 29/270 [00:00<00:00, 284.67it/s]

Epoch 50. Train Loss: (0.0020211546, 0.00012501831), Test Loss : (0.07394106, 0.008759798), Test Accuracy : (0.9744444444444444, 0.9822222222222222), Train Accuracy : (0.9988888888888889, 0.9993827160493827) : Valid Accuracy : (0.942, 0.844)


100%|██████████| 270/270 [00:00<00:00, 288.03it/s]
100%|██████████| 270/270 [00:00<00:00, 287.78it/s]
100%|██████████| 270/270 [00:00<00:00, 287.30it/s]
100%|██████████| 270/270 [00:00<00:00, 288.83it/s]
100%|██████████| 270/270 [00:00<00:00, 292.02it/s]
100%|██████████| 270/270 [00:00<00:00, 294.62it/s]
100%|██████████| 270/270 [00:00<00:00, 296.39it/s]
100%|██████████| 270/270 [00:00<00:00, 295.70it/s]
100%|██████████| 270/270 [00:00<00:00, 295.36it/s]
100%|██████████| 270/270 [00:00<00:00, 290.36it/s]
 11%|█         | 29/270 [00:00<00:00, 282.64it/s]

Epoch 60. Train Loss: (0.00021131491, 3.6445895e-06), Test Loss : (0.09383799, 0.010895292), Test Accuracy : (0.9777777777777777, 0.9777777777777777), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.939, 0.842)


100%|██████████| 270/270 [00:00<00:00, 283.94it/s]
100%|██████████| 270/270 [00:00<00:00, 290.93it/s]
100%|██████████| 270/270 [00:00<00:00, 287.27it/s]
100%|██████████| 270/270 [00:00<00:00, 288.66it/s]
100%|██████████| 270/270 [00:00<00:00, 286.94it/s]
100%|██████████| 270/270 [00:00<00:00, 290.31it/s]
100%|██████████| 270/270 [00:00<00:00, 285.40it/s]
100%|██████████| 270/270 [00:00<00:00, 290.26it/s]
100%|██████████| 270/270 [00:00<00:00, 290.25it/s]
100%|██████████| 270/270 [00:00<00:00, 287.72it/s]
 10%|█         | 28/270 [00:00<00:00, 273.66it/s]

Epoch 70. Train Loss: (8.8357236e-05, 7.0657944e-07), Test Loss : (0.13797253, 0.013243537), Test Accuracy : (0.9755555555555555, 0.9755555555555555), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.938, 0.828)


100%|██████████| 270/270 [00:00<00:00, 281.14it/s]
100%|██████████| 270/270 [00:00<00:00, 286.65it/s]
100%|██████████| 270/270 [00:00<00:00, 282.98it/s]
100%|██████████| 270/270 [00:00<00:00, 289.75it/s]
100%|██████████| 270/270 [00:00<00:00, 290.31it/s]
100%|██████████| 270/270 [00:00<00:00, 285.65it/s]
100%|██████████| 270/270 [00:00<00:00, 291.05it/s]
100%|██████████| 270/270 [00:00<00:00, 293.80it/s]
100%|██████████| 270/270 [00:00<00:00, 294.64it/s]
100%|██████████| 270/270 [00:00<00:00, 286.82it/s]
 11%|█         | 29/270 [00:00<00:00, 283.20it/s]

Epoch 80. Train Loss: (2.9432895e-05, 1.3592586e-07), Test Loss : (0.17839178, 0.016036982), Test Accuracy : (0.9744444444444444, 0.9744444444444444), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.936, 0.823)


100%|██████████| 270/270 [00:00<00:00, 286.43it/s]
100%|██████████| 270/270 [00:01<00:00, 251.95it/s]
100%|██████████| 270/270 [00:00<00:00, 293.48it/s]
100%|██████████| 270/270 [00:00<00:00, 287.21it/s]
100%|██████████| 270/270 [00:01<00:00, 265.63it/s]
100%|██████████| 270/270 [00:00<00:00, 287.14it/s]
100%|██████████| 270/270 [00:00<00:00, 292.87it/s]
100%|██████████| 270/270 [00:00<00:00, 286.61it/s]
100%|██████████| 270/270 [00:01<00:00, 267.91it/s]
100%|██████████| 270/270 [00:01<00:00, 260.14it/s]
  9%|▉         | 24/270 [00:00<00:01, 230.75it/s]

Epoch 90. Train Loss: (0.000113624585, 9.535835e-06), Test Loss : (0.06316081, 0.008536507), Test Accuracy : (0.9744444444444444, 0.9855555555555555), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.918, 0.835)


100%|██████████| 270/270 [00:01<00:00, 256.62it/s]
100%|██████████| 270/270 [00:00<00:00, 283.31it/s]
100%|██████████| 270/270 [00:01<00:00, 269.47it/s]
100%|██████████| 270/270 [00:00<00:00, 284.30it/s]
100%|██████████| 270/270 [00:00<00:00, 274.53it/s]
100%|██████████| 270/270 [00:00<00:00, 274.23it/s]
100%|██████████| 270/270 [00:01<00:00, 255.14it/s]
100%|██████████| 270/270 [00:01<00:00, 257.04it/s]
100%|██████████| 270/270 [00:01<00:00, 254.27it/s]


#### Model export and Visualize 

In [None]:
model.export("model")

Netron으로 네트워크 시각화 

- https://lutzroeder.github.io/netron/
- 저장된 `model-symbol.json`을 입력해 시각화 

In [93]:
load_model = gluon.nn.SymbolBlock.imports("model-symbol.json", ['data0', 'data1'], "model-0000.params")

In [94]:
def get_entitytag(sent):
    sent_len = len(sent)
    coded_sent = vocab_sent[length_clip(sent)]
    co = nd.array(coded_sent).expand_dims(axis=1)
    ret_code = load_model(co, nd.array([sent_len,]))
    ret_seq = vocab_entity.to_tokens(ret_code.argmax(axis=2)[0].asnumpy().astype('int').tolist())
    return(''.join(ret_seq))

### TODO
- Test Accuracy 95% 이상 올리기
- test_hidden 셋의 성능 90% 이상 올리기 
- Entity Tagging과 Intent Classification을 MultiTask Learning으로 통합해보기(성능이 좋아지나? 나빠지나?)



## entity tagging
Epoch 90. Train Loss: 7.2252215e-10, Test Loss : 0.01954503, Test Accuracy : 0.9722222222222222, Train Accuracy : 1.0 : Valid Accuracy : 0.976

## intent classification 
Epoch 90. Train Loss: 2.0868972e-08, Test Loss : 0.033646293, Test Accuracy : 0.9966666666666667, Train Accuracy : 1.0 : Valid Accuracy : 0.992

## multi-task-learning
Epoch 90. Train Loss: (0.000113624585, 9.535835e-06), Test Loss : (0.06316081, 0.008536507), Test Accuracy : (0.9744444444444444, 0.9855555555555555), Train Accuracy : (1.0, 1.0) : Valid Accuracy : (0.918, 0.835)