In [1]:
import tensorflow as tf

from gensim.models.keyedvectors import KeyedVectors
from konlpy.tag import Mecab

# from googletrans import Translator
from models.transformer import * 

import time
import numpy as np

import os
import sys
import urllib.request
import requests
import datetime
import pickle
import json

In [2]:
with open('./data/ko_noun_dict.pkl', "rb") as f:
    ko_dict = pickle.load(f)
    
with open('./data/en_noun_dict.pkl', "rb") as f:
    en_dict = pickle.load(f)

In [3]:
def get_noun_data(ko_dict, en_dict):
    for i in range(len(en_dict)):
        ko_vector = list(ko_dict.values())[i]
        en_vector = list(en_dict.values())[i]
        
        yield (ko_vector, en_vector)
    
def get_noun_data_2(ko_vec, en_vec):
    for i in range(len(en_dict)):
        ko_vector = ko_vec[i]
        en_vector = en_vec[i]
        
        yield ko_vector, en_vector

In [4]:
dataset = tf.data.Dataset.from_generator(get_noun_data_2, 
                              (tf.float64, tf.float64),
                              (tf.TensorShape([300]), tf.TensorShape([300])),
                               args=(list(ko_dict.values()), list(en_dict.values())))

dataset = dataset.batch(32, drop_remainder=True)

In [5]:
encoder = Encoder(num_layers=1, d_model=8, num_heads=8, dff=512, input_vocab_size=0, maximum_position_encoding=0)

In [6]:
EPOCHS = 20000

num_layers = 1
d_model = 8
dff = 512
num_head = 8
dropout_rate = 0.1


In [7]:
loss_object = tf.keras.losses.Huber()
loss_object_KLD = tf.keras.losses.KLDivergence(reduction=tf.keras.losses.Reduction.SUM)
loss_object_MSE = tf.keras.losses.MeanSquaredError()

def loss_function(real, pred):
    print(real.shape)
    print("pred shape : ",pred.shape)
    loss = loss_object(real, pred)
    
    return loss

def loss_function_KLD(real, pred):
    print(real.shape)
    print("pred shape : ",pred.shape)
    loss = loss_object_KLD(real, pred)
    
    return loss

def loss_function_MSE(real, pred):
    loss = loss_object_MSE(real, pred)
    return loss

In [8]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [9]:
checkpoint_path = f'./checkpoints/train(1times embedding, MSE)'

ckpt = tf.train.Checkpoint(encoder=encoder,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print('Latest checkpoint restored!!')

In [10]:
@tf.function()
def train_step(inp, real):
    
    with tf.GradientTape() as tape:
        inp = inp * 1
        real = real * 1
        output = encoder(inp, training=True, mask=None)
#         output = output / 100
        loss = loss_function_MSE(real, output)

        
    gradients = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(gradients, encoder.trainable_variables))
    
    train_loss(loss)

In [11]:
for epoch in range(EPOCHS):
    tic = time.time()
    
    train_loss.reset_states()
    
    for (batch, (inp, real)) in enumerate(dataset):
        train_step(inp, real)
        
        if batch % 10 == 0:
            print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result() : .4f}')
                  
    if (epoch + 1) % 500 == 0:
        ckpt_save_path = ckpt_manager.save()
        print(f'Saving checkpoint for epoch {epoch + 1} at {ckpt_save_path}')

    print(f'Time taken for 1 epoch: {time.time() - tic:.2f} secs\n')



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Model Input shape (32, 300, 1)
(32, 300, 1)
Scaled_attention Shape :  (32, 8, 300, 1)
Scaled_attention Shape :  (32, 300, 8, 1)
Concat attention Shape : (32, 300, 8)
(32, 300, 1)
Model Input shape (32, 300, 1)
(32, 300, 1)
Scaled_attention Shape :  (32, 8, 300, 1)
Scaled_attention Shape :  (32, 300, 8, 1)
Concat attention Shape : (32, 300, 8)
(32, 300, 1)
Epoch 1 Batch 0 Loss  0.0690
Epoch 1 Batch 10 Loss  0.0789
Time taken for 1 epoch: 2.07 secs

Epoch 2 Batch 0 Loss  0.0689
Epoch 2 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 3 Batch 0 Loss  0.0689
Epoch 3 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 4 Batch 0 Loss  0.0689
Epoch 4 Batch 10 Loss  0.0789


Epoch 76 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 77 Batch 0 Loss  0.0689
Epoch 77 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 78 Batch 0 Loss  0.0689
Epoch 78 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.13 secs

Epoch 79 Batch 0 Loss  0.0689
Epoch 79 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 80 Batch 0 Loss  0.0689
Epoch 80 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 81 Batch 0 Loss  0.0689
Epoch 81 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 82 Batch 0 Loss  0.0689
Epoch 82 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 83 Batch 0 Loss  0.0689
Epoch 83 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 84 Batch 0 Loss  0.0689
Epoch 84 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 85 Batch 0 Loss  0.0689
Epoch 85 Batch 10 Loss  0.0789
Time taken for 1 epoch: 0.12 secs

Epoch 86 Batch 0 Loss  0.0689
Epoch 86 Batch 10 Loss  0.0789
Time take

KeyboardInterrupt: 