### Import libraries

In [9]:
from model import classic_model, losses, dot_prod_attention
from data import data_generation, batch_creator, gp_kernels
from keras.callbacks import ModelCheckpoint
from helpers import helpers, masks
from inference import infer
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
import numpy as np
import matplotlib 
import time
import keras

In [10]:
save_dir = '/Users/omernivron/Downloads/GPT'

In [11]:
pad_pos_tr, pad_pos_te, pad_y_fren_tr, pad_y_fren_te, _, df_te = data_generation.data_generator_for_gp_mimick_gpt(50000, gp_kernels.rbf_kernel)

In [12]:
loss_object = tf.keras.losses.MeanSquaredError()
train_loss = tf.keras.metrics.Mean(name='train_loss')
test_loss = tf.keras.metrics.Mean(name='test_loss')
m_tr = tf.keras.metrics.Mean()
m_te = tf.keras.metrics.Mean()

In [13]:
@tf.function
def train_step(decoder, optimizer_c, train_loss, m_tr, pos, tar, pos_mask):
    '''
    A typical train step function for TF2. Elements which we wish to track their gradient
    has to be inside the GradientTape() clause. see (1) https://www.tensorflow.org/guide/migrate 
    (2) https://www.tensorflow.org/tutorials/quickstart/advanced
    ------------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask (np array): see description in position_mask function
    ------------------    
    '''
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    combined_mask_tar = masks.create_masks(tar_inp)
    with tf.GradientTape(persistent=True) as tape:
        pred, pred_log_sig = decoder(pos, tar_inp, True, pos_mask, combined_mask_tar)

        loss, mse, mask = losses.loss_function(tar_real, pred, pred_log_sig)

    gradients = tape.gradient(loss, decoder.trainable_variables)
    # Ask the optimizer to apply the processed gradients.
    optimizer_c.apply_gradients(zip(gradients, decoder.trainable_variables))
    train_loss(loss)
    m_tr.update_state(mse, mask)

In [14]:
@tf.function
def test_step(decoder, test_loss, m_te, pos_te, tar_te, pos_mask_te):
    '''
    
    ---------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask_te (np array): see description in position_mask function
    ---------------
    
    '''
    tar_inp_te = tar_te[:, :-1]
    tar_real_te = tar_te[:, 1:]
    combined_mask_tar_te = masks.create_masks(tar_inp_te)
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
    pred, pred_log_sig = decoder(pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)
    t_loss, t_mse, t_mask = losses.loss_function(tar_real_te, pred, pred_log_sig)
    test_loss(t_loss)
    m_te.update_state(t_mse, t_mask)

In [15]:
tf.keras.backend.set_floatx('float64')

In [None]:
if __name__ == '__main__':
    writer = tf.summary.create_file_writer(save_dir + '/logs/')
    optimizer_c = tf.keras.optimizers.Adam()
    decoder = classic_model.Decoder(16)
    EPOCHS = 10
    batch_s  = 128
    run = 0; step = 0
    num_batches = int(pad_y_fren_tr.shape[0] / batch_s)
    tf.random.set_seed(1)   
    ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer = optimizer_c, net = decoder)
    main_folder = "/Users/omernivron/Downloads/GPT/ckpt/check_"
    folder = main_folder + str(run); helpers.mkdir(folder)
    manager = tf.train.CheckpointManager(ckpt, folder, max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    
    with writer.as_default():
        for epoch in range(EPOCHS):
            start = time.time()

            for batch_n in range(num_batches):
                m_tr.reset_states(); train_loss.reset_states()
                m_te.reset_states(); test_loss.reset_states()
                batch_pos_tr, batch_tar_tr, _ = batch_creator.create_batch_gp_mim_2(pad_pos_tr, pad_y_fren_tr)
                # batch_tar_tr shape := 128 X 59 = (batch_size, max_seq_len)
                # batch_pos_tr shape := 128 X 59 = (batch_size, max_seq_len)
                batch_pos_mask = masks.position_mask(batch_pos_tr)
                train_step(decoder, optimizer_c, train_loss, m_tr, batch_pos_tr, batch_tar_tr, batch_pos_mask)

                if batch_n % 50 == 0:
                    batch_pos_te, batch_tar_te, _ = batch_creator.create_batch_gp_mim_2(pad_pos_te, pad_y_fren_te)
                    batch_pos_mask_te = masks.position_mask(batch_pos_te)
                    test_step(decoder, test_loss, m_te, batch_pos_te, batch_tar_te, batch_pos_mask_te)
                    helpers.print_progress(epoch, batch_n, train_loss.result(), test_loss.result(), m_tr.result())
                    helpers.tf_summaries(run, step, train_loss.result(), test_loss.result(), m_tr.result(), m_te.result())
                    manager.save()
                step += 1
                ckpt.step.assign_add(1)

            print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Already exists
Initializing from scratch.
Tensor("decoder_1/Bsig/Relu:0", shape=(128, 58, 16, 16), dtype=float64)
Tensor("decoder_1/Bsig/Relu:0", shape=(128, 58, 16, 16), dtype=float64)
Tensor("decoder_1/Bsig/Relu:0", shape=(128, 58, 16, 16), dtype=float64)
Epoch 0 batch 0 train Loss 4.1967 test Loss 1.4344 with MSE metric 3.5949
Epoch 0 batch 50 train Loss 0.8677 test Loss 0.4841 with MSE metric 0.8245
Epoch 0 batch 100 train Loss 0.6050 test Loss 0.3238 with MSE metric 0.5258
Epoch 0 batch 150 train Loss 0.5343 test Loss 0.2313 with MSE metric 0.4746
Epoch 0 batch 200 train Loss 0.5345 test Loss 0.1645 with MSE metric 0.4723
Epoch 0 batch 250 train Loss 0.4268 test Loss 0.1206 with MSE metric 0.3982
Epoch 0 batch 300 train Loss 0.2378 test Loss 0.1647 with MSE metric 0.2763
Time taken for 1 epoch: 291.3145179748535 secs

Epoch 1 batch 0 train Loss 0.2607 test Loss 0.0810 with MSE metric 0.2932
Epoch 1 batch 50 train Loss 0.1663 test Loss 0.0260 with MSE metric 0.2451
Epoch 1 batch 10

tar = pad_y_fren_te[:, 1:]



In [None]:
1 - (0.0165 / sum((tar[:, 5] - np.mean(tar[:, 5]))**2) / len(tar[:, 5]))

In [None]:
tar - np.mean(tar, 0)

In [None]:
tar.shape

In [None]:
np.mean(tar[:, 0])

In [None]:
sum((tar[:, 0] - np.mean(tar[:, 0]))**2 )/ 10000

In [None]:
sum(sum((tar - np.mean(tar))**2)) / (tar.shape[0] * tar.shape[1])

In [None]:
pos = df_te[560, :].reshape(1, -1)

In [None]:
tar = df_te[561, :39].reshape(1, -1)

In [None]:
df_te[561, :]

In [None]:
a = inference(pos, tar, 20)

In [None]:
with matplotlib.rc_context({'figure.figsize': [10,2.5]}):
    plt.scatter(pos[:, :39], tar[:, :39], c='black')
    plt.scatter(pos[:, 39:58], a[39:])
    plt.scatter(pos[:, 39:58], df_te[561, 39:58], c='red')

In [None]:
# tf.data.Dataset(tf.Tensor(pad_pos_tr, value_index = 0 , dtype = tf.float32))