In [1]:
import tensorflow as tf

from gensim.models.keyedvectors import KeyedVectors
from konlpy.tag import Mecab

# from googletrans import Translator
from models.transformer import * 

import time
import numpy as np

import os
import sys
import urllib.request
import requests
import datetime
import pickle
import json

In [2]:
with open('./data/ko_noun_dict.pkl', "rb") as f:
    ko_dict = pickle.load(f)
    
with open('./data/en_noun_dict.pkl', "rb") as f:
    en_dict = pickle.load(f)

In [27]:
def get_noun_data(ko_dict, en_dict):
    for i in range(len(en_dict)):
        ko_vector = list(ko_dict.values())[i]
        en_vector = list(en_dict.values())[i]
        
#         yield (ko_word, ko_vector, en_word, en_vector)
        yield (ko_vector, en_vector)
    
def get_noun_data_2(ko_vec, en_vec):
    for i in range(len(en_dict)):
        ko_vector = ko_vec[i]
        en_vector = en_vec[i]
        
        yield ko_vector, en_vector

In [38]:
dataset = tf.data.Dataset.from_generator(get_noun_data_2, 
                              (tf.float64, tf.float64),
                              (tf.TensorShape([300]), tf.TensorShape([300])),
                               args=(list(ko_dict.values()), list(en_dict.values())))

dataset = dataset.batch(128, drop_remainder=True)

In [39]:
len(en_dict.keys())

576

In [40]:
i = 0
for (batch, (inp, real)) in enumerate(dataset):
    i += 1
    print(i)
    print(inp)

1
tf.Tensor(
[[-2.34799996e-01  1.66319996e-01 -4.29280013e-01 ... -3.31140012e-01
   1.89950004e-01 -2.73939997e-01]
 [ 1.88099995e-01  1.99169993e-01 -3.89319986e-01 ...  8.55629966e-02
  -1.69309992e-02  4.76790011e-01]
 [ 1.88069999e-01 -4.09130007e-04  1.67890005e-02 ... -4.35310006e-01
  -1.13710001e-01  3.76199991e-01]
 ...
 [-2.30690002e-01  1.01310000e-01 -6.93660021e-01 ... -3.70799989e-01
   2.50369996e-01  1.08249998e+00]
 [-1.72539994e-01  2.10670009e-02  9.37939994e-03 ...  5.34860007e-02
   3.27100009e-02 -3.39320004e-01]
 [-4.60009992e-01  1.11610003e-01 -7.62679994e-01 ... -2.70570010e-01
  -3.07330012e-01 -1.95759997e-01]], shape=(128, 300), dtype=float64)
2
tf.Tensor(
[[ 0.20111001  0.047298    0.19909    ... -0.17775001  0.27785
  -0.51533002]
 [-0.054022    0.27379    -0.54825997 ... -0.37169001  0.35732001
  -0.11435   ]
 [ 0.22186001  0.54045999 -0.20812    ... -0.28387001 -0.10013
   1.10239995]
 ...
 [-0.24336     0.18984     0.12082    ...  0.12925     0.09642

In [41]:
encoder = Encoder(num_layers=1, d_model=1, num_heads=1, dff=512, input_vocab_size=0, maximum_position_encoding=0)

In [42]:
EPOCHS = 20

num_layers = 1
d_model = 1
dff = 512
num_head = 1
dropout_rate = 0.1


In [43]:
loss_object = tf.keras.losses.Huber()

def loss_function(real, pred):
    loss = loss_object(real, pred)
    
    return loss

In [44]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
optimizer = tf.keras.optimizers.Adam()

In [49]:
checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(encoder=encoder,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print('Latest checkpoint restored!!')

In [50]:
@tf.function()
def train_step(inp, real):
    
    with tf.GradientTape() as tape:
        output = encoder(inp, training=True, mask=None)
        loss = loss_function(real, output)
        
    gradients = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(gradients, encoder.trainable_variables))
    
    train_loss(loss)

In [51]:
for epoch in range(EPOCHS):
    tic = time.time()
    
    train_loss.reset_states()
    
    for (batch, (inp, real)) in enumerate(dataset):
        train_step(inp, real)
        
        if batch % 50 == 0:
            print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result() : .4f}')
                  
    if (epoch + 1) % 5 == 0:
        ckpt_save_path = ckpt_manager.save()
        print(f'Saving checkpoint for epoch {epoch + 1} at {ckpt_save_path}')

    print(f'Time taken for 1 epoch: {time.time() - tic:.2f} secs\n')

Model Input shape (128, 300, 1)
Scaled_attention Shape :  (128, 1, 300, 1)
Scaled_attention Shape :  (128, 300, 1, 1)
Concat attention Shape : (128, 300, 1)
Epoch 1 Batch 0 Loss  0.0385
Time taken for 1 epoch: 0.61 secs

Epoch 2 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.07 secs

Epoch 3 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.07 secs

Epoch 4 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.07 secs

Epoch 5 Batch 0 Loss  0.0384
Saving checkpoint for epoch 5 at ./checkpoints/train/ckpt-1
Time taken for 1 epoch: 0.09 secs

Epoch 6 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.06 secs

Epoch 7 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.06 secs

Epoch 8 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.07 secs

Epoch 9 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.08 secs

Epoch 10 Batch 0 Loss  0.0384
Saving checkpoint for epoch 10 at ./checkpoints/train/ckpt-2
Time taken for 1 epoch: 0.09 secs

Epoch 11 Batch 0 Loss  0.0384
Time taken for 1 epoch: 0.09 secs

Epoch 12 Batch 0