### Import libraries

In [12]:
from model import model, losses, dot_prod_attention
from data import data_generation, batch_creator, gp_kernels
from keras.callbacks import ModelCheckpoint
from helpers import helpers, masks
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
import numpy as np
import matplotlib 
import time
import keras

In [2]:
save_dir = '/Users/omernivron/Downloads/GPT'

In [3]:
pad_pos_tr, pad_pos_te, pad_y_fren_tr, pad_y_fren_te, _, df_te = data_generation.data_generator_for_gp_mimick_gpt(50000, gp_kernels.rbf_kernel)

In [4]:
pp = masks.position_mask(pad_pos_tr)
pp_te = masks.position_mask(pad_pos_te)

In [65]:
loss_object = tf.keras.losses.MeanSquaredError()
train_loss = tf.keras.metrics.Mean(name='train_loss')
test_loss = tf.keras.metrics.Mean(name='test_loss')
r_sq_tr = tfa.metrics.RSquare(dtype = tf.float64)
r_sq_te = tfa.metrics.RSquare()

In [74]:
@tf.function
def train_step(pos, tar, pos_mask):
    '''
    A typical train step function for TF2. Elements which we wish to track their gradient
    has to be inside the GradientTape() clause. see (1) https://www.tensorflow.org/guide/migrate 
    (2) https://www.tensorflow.org/tutorials/quickstart/advanced
    ------------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask (np array): see description in position_mask function
    ------------------    
    '''
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    combined_mask_tar = masks.create_masks(tar_inp)
    with tf.GradientTape(persistent=True) as tape:
        pred = decoder(pos, tar_inp, True, pos_mask, combined_mask_tar)
#         print('pred: ')
#         tf.print(pred)

        loss = losses.loss_function(tar_real, pred)


    gradients = tape.gradient(loss, decoder.trainable_variables)
#     tf.print(gradients)
# Ask the optimizer to apply the processed gradients.
    optimizer_c.apply_gradients(zip(gradients, decoder.trainable_variables))
    train_loss(loss)
#     b = decoder.trainable_weights[0]
#     tf.print(tf.reduce_mean(b))

In [75]:
@tf.function
def test_step(pos_te, tar_te, pos_mask_te):
    '''
    
    ---------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask_te (np array): see description in position_mask function
    ---------------
    
    '''
    tar_inp_te = tar_te[:, :-1]
    tar_real_te = tar_te[:, 1:]
    combined_mask_tar_te = masks.create_masks(tar_inp_te)
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
    pred = decoder(pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)
    t_loss = losses.loss_function(tar_real_te, pred)
    test_loss(t_loss)
#     r_sq_te.update_state(tar_real_te, pred)

In [76]:
tf.keras.backend.set_floatx('float64')

In [77]:
if __name__ == '__main__':
    writer = tf.summary.create_file_writer(save_dir + '/logs/')
    optimizer_c = tf.keras.optimizers.Adam()
    decoder = model.Decoder(16)
    EPOCHS = 50
    batch_s  = 128
    run = 0; step = 0
    num_batches = int(pad_y_fren_tr.shape[0] / batch_s)
    tf.random.set_seed(1)    
    checkpoint = tf.train.Checkpoint(optimizer = optimizer_c, model = decoder)
    main_folder = "/Users/omernivron/Downloads/GPT/ckpt/check_"
    folder = main_folder + str(run); helpers.mkdir(folder)

    with writer.as_default():
        for epoch in range(EPOCHS):
            start = time.time()

            for batch_n in range(num_batches):
                batch_pos_tr, batch_tar_tr, batch_pos_mask, _ = batch_creator.create_batch_gp_mim_2(pad_pos_tr, pad_y_fren_tr, pp)
                # batch_tar_tr shape := 128 X 59 = (batch_size, max_seq_len)
                # batch_pos_tr shape := 128 X 59 = (batch_size, max_seq_len)
                train_step(batch_pos_tr, batch_tar_tr, batch_pos_mask)

                if batch_n % 50 == 0:
                    batch_pos_te, batch_tar_te, batch_pos_mask_te, _ = batch_creator.create_batch_gp_mim_2(pad_pos_te, pad_y_fren_te, pp_te)
                    test_step(batch_pos_te, batch_tar_te, batch_pos_mask_te)
                    helpers.print_progress(epoch, batch_n, train_loss.result(), test_loss.result(), r_sq_tr.result())
                    helpers.tf_summaries(run, step, train_loss.result(), test_loss.result(), r_sq_tr.result(), r_sq_te.result())
                    checkpoint.save(folder + '/')
                step += 1

            print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Already exists


ValueError: in user code:

    <ipython-input-74-04fe53406be6>:23 train_step  *
        r_sq_tr.update_state(tf.reshape(tar_real, shape = [1, -1]), tf.reshape(pred, shape = [1, -1]))
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow_addons/metrics/r_square.py:121 decorated  *
        update_op = update_state_fn(*args, **kwargs)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow_addons/metrics/r_square.py:115 update_state  *
        self.sum.assign_add(tf.reduce_sum(weighted_y_true, axis=0))
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:813 assign_add  **
        name=name)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py:56 assign_add_variable_op
        "AssignAddVariableOp", resource=resource, value=value, name=name)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:595 _create_op_internal
        compute_device)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:3327 _create_op_internal
        op_def=op_def)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1817 __init__
        control_input_ops, op_def)
    /Users/omernivron/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1657 _create_c_op
        raise ValueError(str(e))

    ValueError: Shapes must be equal rank, but are 0 and 1 for '{{node AssignAddVariableOp}} = AssignAddVariableOp[dtype=DT_DOUBLE](AssignAddVariableOp/resource, Sum_2)' with input shapes: [], [7424].


In [16]:
tar = pad_y_fren_te[:, 1:]



In [34]:
1 - (0.0165 / sum((tar[:, 5] - np.mean(tar[:, 5]))**2) / len(tar[:, 5]))

0.999999999833459

In [23]:
tar - np.mean(tar, 0)

array([[-0.23126513, -0.22783515, -0.15818167, ..., -0.12317099,
        -0.32767493, -0.21421905],
       [-0.70544454, -0.73005885, -0.6830323 , ..., -0.74098858,
        -0.59824057, -0.81173125],
       [ 0.10899795,  0.18115021,  0.22626579, ...,  0.20117891,
         0.25371165,  0.13876939],
       ...,
       [-0.38531545, -0.30379112, -0.27217713, ..., -0.26784763,
        -0.299351  , -0.39355877],
       [ 1.2178523 ,  1.30296639,  1.24647646, ...,  1.19968792,
         1.19953355,  1.18947239],
       [-1.21407981, -1.19715032, -1.27335662, ..., -1.23137991,
        -1.22176898, -1.21790715]])

In [21]:
sum(sum((tar - np.mean(tar))**2)) / (tar.shape[0] * tar.shape[1])

0.9916270624993678

In [None]:
pos = df_te[560, :].reshape(1, -1)

In [None]:
tar = df_te[561, :39].reshape(1, -1)

In [None]:
df_te[561, :]

In [None]:
def evaluate(pos, tar, pos_mask):
    '''
    
    '''
    combined_mask_tar = create_masks(tar)
    out = decoder(pos, tar, False, pos_mask, combined_mask_tar)
    return out

In [None]:
def inference(pos, tar, num_steps = 1):
    '''
    
    ------------------
    Parameters:
    pos (2D np array): (n + num_steps) positions 
    tar (2D np array): n targets 
    num_steps (int): how many inference steps are required
    ------------------
    Returns:
    out (tf.tensor float64): the predictions for all timestamps up to n + num_steps  
    
    '''
    n = tar.shape[1]
    temp_pos = pos[:, :(n + 1)]
    pos_mask = position_mask(temp_pos)
    
    out = evaluate(temp_pos, tar, pos_mask)
#     print(out[n - 1])
    tar = tf.concat((tar, tf.reshape(out[n - 1], [1, 1])), axis = 1)
    if num_steps > 1:
        out = inference(pos, tar, num_steps - 1)
    
    return out
    

In [None]:
a = inference(pos, tar, 20)

In [None]:
with matplotlib.rc_context({'figure.figsize': [10,2.5]}):
    plt.scatter(pos[:, :39], tar[:, :39], c='black')
    plt.scatter(pos[:, 39:58], a[39:])
    plt.scatter(pos[:, 39:58], df_te[561, 39:58], c='red')

In [None]:
# tf.data.Dataset(tf.Tensor(pad_pos_tr, value_index = 0 , dtype = tf.float32))