In [1]:
import tensorflow as tf

from gensim.models.keyedvectors import KeyedVectors
from konlpy.tag import Mecab

# from googletrans import Translator
from models.transformer import * 

import time
import numpy as np

import os
import sys
import urllib.request
import requests
import datetime
import pickle
import json

In [2]:
with open('./data/ko_noun_dict.pkl', "rb") as f:
    ko_dict = pickle.load(f)
    
with open('./data/en_noun_dict.pkl', "rb") as f:
    en_dict = pickle.load(f)

In [3]:
def get_noun_data(ko_dict, en_dict):
    for i in range(len(en_dict)):
        ko_vector = list(ko_dict.values())[i]
        en_vector = list(en_dict.values())[i]
        
#         yield (ko_word, ko_vector, en_word, en_vector)
        yield (ko_vector, en_vector)
    
def get_noun_data_2(ko_vec, en_vec):
    for i in range(len(en_dict)):
        ko_vector = ko_vec[i]
        en_vector = en_vec[i]
        
        yield ko_vector, en_vector

In [4]:
dataset = tf.data.Dataset.from_generator(get_noun_data_2, 
                              (tf.float64, tf.float64),
                              (tf.TensorShape([300]), tf.TensorShape([300])),
                               args=(list(ko_dict.values()), list(en_dict.values())))

dataset = dataset.batch(128, drop_remainder=True)

In [28]:
encoder = Encoder(num_layers=1, d_model=8, num_heads=8, dff=512, input_vocab_size=0, maximum_position_encoding=0)

In [12]:
EPOCHS = 2000

num_layers = 1
d_model = 8
dff = 512
num_head = 8
dropout_rate = 0.1


In [13]:
loss_object = tf.keras.losses.Huber()

def loss_function(real, pred):
    print(real.shape)
    print("pred shape : ",pred.shape)
    loss = loss_object(real, pred)
    
    return loss

In [38]:
loss_object_KLD = tf.keras.losses.KLDivergence(reduction=tf.keras.losses.Reduction.SUM)

def loss_function_KLD(real, pred):
    print(real.shape)
    print("pred shape : ",pred.shape)
    loss = loss_object_KLD(real, pred)
    
    return loss

In [39]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [43]:
checkpoint_path = f'./checkpoints/train(1000times embedding, Huber_loss)'

ckpt = tf.train.Checkpoint(encoder=encoder,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print('Latest checkpoint restored!!')

In [44]:
@tf.function()
def train_step(inp, real):
    
    with tf.GradientTape() as tape:
        inp = inp * 1000 
        real = real * 1000
        output = encoder(inp, training=True, mask=None)
#         output = output / 100
        loss = loss_function(real, output)

        
    gradients = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(gradients, encoder.trainable_variables))
    
    train_loss(loss)

In [45]:
for epoch in range(EPOCHS):
    tic = time.time()
    
    train_loss.reset_states()
    
    for (batch, (inp, real)) in enumerate(dataset):
        train_step(inp, real)
        
        if batch % 50 == 0:
            print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result() : .4f}')
                  
    if (epoch + 1) % 5 == 0:
        ckpt_save_path = ckpt_manager.save()
        print(f'Saving checkpoint for epoch {epoch + 1} at {ckpt_save_path}')

    print(f'Time taken for 1 epoch: {time.time() - tic:.2f} secs\n')

Model Input shape (128, 300, 1)
(128, 300, 1)
Scaled_attention Shape :  (128, 8, 300, 1)
Scaled_attention Shape :  (128, 300, 8, 1)
Concat attention Shape : (128, 300, 8)
(128, 300, 1)
out1 shape : (128, 300, 1)
out2 shape :  (128, 300, 1)
(128, 300)
pred shape :  (128, 300, 1)
Epoch 1 Batch 0 Loss  217.7459
Time taken for 1 epoch: 0.69 secs

Epoch 2 Batch 0 Loss  217.7426
Time taken for 1 epoch: 0.12 secs

Epoch 3 Batch 0 Loss  217.7384
Time taken for 1 epoch: 0.12 secs

Epoch 4 Batch 0 Loss  217.7342
Time taken for 1 epoch: 0.11 secs

Epoch 5 Batch 0 Loss  217.7302
Saving checkpoint for epoch 5 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-1
Time taken for 1 epoch: 0.15 secs

Epoch 6 Batch 0 Loss  217.7265
Time taken for 1 epoch: 0.11 secs

Epoch 7 Batch 0 Loss  217.7232
Time taken for 1 epoch: 0.12 secs

Epoch 8 Batch 0 Loss  217.7202
Time taken for 1 epoch: 0.12 secs

Epoch 9 Batch 0 Loss  217.7175
Time taken for 1 epoch: 0.12 secs

Epoch 10 Batch 0 Loss  217.7151
Sa

Time taken for 1 epoch: 0.11 secs

Epoch 95 Batch 0 Loss  217.6908
Saving checkpoint for epoch 95 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-19
Time taken for 1 epoch: 0.15 secs

Epoch 96 Batch 0 Loss  217.6909
Time taken for 1 epoch: 0.10 secs

Epoch 97 Batch 0 Loss  217.6909
Time taken for 1 epoch: 0.11 secs

Epoch 98 Batch 0 Loss  217.6910
Time taken for 1 epoch: 0.12 secs

Epoch 99 Batch 0 Loss  217.6910
Time taken for 1 epoch: 0.12 secs

Epoch 100 Batch 0 Loss  217.6911
Saving checkpoint for epoch 100 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-20
Time taken for 1 epoch: 0.15 secs

Epoch 101 Batch 0 Loss  217.6911
Time taken for 1 epoch: 0.11 secs

Epoch 102 Batch 0 Loss  217.6912
Time taken for 1 epoch: 0.12 secs

Epoch 103 Batch 0 Loss  217.6912
Time taken for 1 epoch: 0.11 secs

Epoch 104 Batch 0 Loss  217.6913
Time taken for 1 epoch: 0.11 secs

Epoch 105 Batch 0 Loss  217.6913
Saving checkpoint for epoch 105 at ./checkpoints/train(1000times e

Time taken for 1 epoch: 0.12 secs

Epoch 189 Batch 0 Loss  217.6942
Time taken for 1 epoch: 0.12 secs

Epoch 190 Batch 0 Loss  217.6942
Saving checkpoint for epoch 190 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-38
Time taken for 1 epoch: 0.15 secs

Epoch 191 Batch 0 Loss  217.6942
Time taken for 1 epoch: 0.10 secs

Epoch 192 Batch 0 Loss  217.6942
Time taken for 1 epoch: 0.12 secs

Epoch 193 Batch 0 Loss  217.6942
Time taken for 1 epoch: 0.12 secs

Epoch 194 Batch 0 Loss  217.6943
Time taken for 1 epoch: 0.12 secs

Epoch 195 Batch 0 Loss  217.6943
Saving checkpoint for epoch 195 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-39
Time taken for 1 epoch: 0.14 secs

Epoch 196 Batch 0 Loss  217.6943
Time taken for 1 epoch: 0.10 secs

Epoch 197 Batch 0 Loss  217.6943
Time taken for 1 epoch: 0.12 secs

Epoch 198 Batch 0 Loss  217.6943
Time taken for 1 epoch: 0.12 secs

Epoch 199 Batch 0 Loss  217.6943
Time taken for 1 epoch: 0.12 secs

Epoch 200 Batch 0 Loss  2

Time taken for 1 epoch: 0.12 secs

Epoch 283 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.12 secs

Epoch 284 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.12 secs

Epoch 285 Batch 0 Loss  217.6949
Saving checkpoint for epoch 285 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-57
Time taken for 1 epoch: 0.15 secs

Epoch 286 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.10 secs

Epoch 287 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.12 secs

Epoch 288 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.12 secs

Epoch 289 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.12 secs

Epoch 290 Batch 0 Loss  217.6949
Saving checkpoint for epoch 290 at ./checkpoints/train(1000times embedding, Huber_loss)/ckpt-58
Time taken for 1 epoch: 0.15 secs

Epoch 291 Batch 0 Loss  217.6949
Time taken for 1 epoch: 0.11 secs

Epoch 292 Batch 0 Loss  217.6950
Time taken for 1 epoch: 0.12 secs

Epoch 293 Batch 0 Loss  217.6950
Time taken for 1 epoch: 0.12 secs

Epoch 294 Batch 0 Loss  2

KeyboardInterrupt: 