### Import libraries

In [1]:
from model import classic_model_mh, classic_model, losses, dot_prod_attention
from data import data_generation, batch_creator, gp_kernels, gp_priors
from keras.callbacks import ModelCheckpoint
from helpers import helpers, masks, metrics
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from inference import infer
import tensorflow as tf
import numpy as np
import matplotlib 
import time
import keras
plt.style.use('ggplot')

Using TensorFlow backend.


In [2]:
save_dir = '/Users/omernivron/Downloads/GPT'

In [3]:
pad_pos_tr, pad_pos_te, pad_y_fren_tr, pad_y_fren_te, _, df_te = data_generation.data_generator_for_gp_mimick_gpt(150000, ordered = True)

In [4]:
m = np.mean(pad_pos_tr)
std = np.std(pad_pos_tr)
m_y = np.mean(pad_y_fren_tr)
std_y = np.std(pad_y_fren_tr)

In [5]:
pad_pos_tr = (pad_pos_tr - m) / std
pad_pos_te = (pad_pos_te - m) / std
pad_y_fren_tr = (pad_y_fren_tr - m_y) / std_y
pad_y_fren_te = (pad_y_fren_te - m_y) / std_y

In [6]:
loss_object = tf.keras.losses.MeanSquaredError()
train_loss = tf.keras.metrics.Mean(name='train_loss')
test_loss = tf.keras.metrics.Mean(name='test_loss')
m_tr = tf.keras.metrics.Mean()
m_te = tf.keras.metrics.Mean()

In [7]:
@tf.function
def train_step(decoder, optimizer_c, train_loss, m_tr, pos, tar, pos_mask):
    '''
    A typical train step function for TF2. Elements which we wish to track their gradient
    has to be inside the GradientTape() clause. see (1) https://www.tensorflow.org/guide/migrate 
    (2) https://www.tensorflow.org/tutorials/quickstart/advanced
    ------------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask (np array): see description in position_mask function
    ------------------    
    '''
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    combined_mask_tar = masks.create_masks(tar_inp)
    with tf.GradientTape(persistent=True) as tape:
#         pred, pred_log_sig = decoder(pos, tar_inp, True, pos_mask, combined_mask_tar)
        pred = decoder(pos, tar_inp, True, pos_mask, combined_mask_tar)


# 
#         loss, mse, mask = losses.loss_function(tar_real, pred, pred_log_sig)
        loss, mse, mask = losses.loss_function(tar_real, pred)



    gradients = tape.gradient(loss, decoder.trainable_variables)
    # Ask the optimizer to apply the processed gradients.
    optimizer_c.apply_gradients(zip(gradients, decoder.trainable_variables))
    train_loss(loss)
    m_tr.update_state(mse, mask)

In [8]:
@tf.function
def test_step(decoder, test_loss, m_te, pos_te, tar_te, pos_mask_te):
    '''
    
    ---------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask_te (np array): see description in position_mask function
    ---------------
    
    '''
    tar_inp_te = tar_te[:, :-1]
    tar_real_te = tar_te[:, 1:]
    combined_mask_tar_te = masks.create_masks(tar_inp_te)
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
#   pred = decoder(pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)


#     pred, pred_log_sig = decoder(pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)
    pred = decoder(pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)


#     t_loss, t_mse, t_mask = losses.loss_function(tar_real_te, pred, pred_log_sig)
    t_loss, t_mse, t_mask = losses.loss_function(tar_real_te, pred)

    test_loss(t_loss)
    m_te.update_state(t_mse, t_mask)

In [9]:
tf.keras.backend.set_floatx('float64')

In [11]:
if __name__ == '__main__':
    writer = tf.summary.create_file_writer(save_dir + '/logs/')
    optimizer_c = tf.keras.optimizers.Adam(0.003)
    decoder = classic_model.Decoder(64)
    EPOCHS = 750
    batch_s  = 64
    run = 25; step = 0
    num_batches = int(pad_y_fren_tr.shape[0] / batch_s)
#   tf.random.set_seed(1)   
    ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer = optimizer_c, net = decoder)
    main_folder = "/Users/omernivron/Downloads/GPT/ckpt/check_"
    folder = main_folder + str(run); helpers.mkdir(folder)
    manager = tf.train.CheckpointManager(ckpt, folder, max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    
    with writer.as_default():
        for epoch in range(EPOCHS):
            start = time.time()

            for batch_n in range(num_batches):
                m_tr.reset_states(); train_loss.reset_states()
                m_te.reset_states(); test_loss.reset_states()
                batch_pos_tr, batch_tar_tr, _ = batch_creator.create_batch_gp_mim_2(pad_pos_tr, pad_y_fren_tr, batch_s=64)
                # batch_tar_tr shape := 128 X 59 = (batch_size, max_seq_len)
                # batch_pos_tr shape := 128 X 59 = (batch_size, max_seq_len)
                batch_pos_mask = masks.position_mask(batch_pos_tr)
                train_step(decoder, optimizer_c, train_loss, m_tr, batch_pos_tr, batch_tar_tr, batch_pos_mask)
                
                if batch_n % 50 == 0:
                    batch_pos_te, batch_tar_te, _ = batch_creator.create_batch_gp_mim_2(pad_pos_te, pad_y_fren_te, batch_s=64)
                    batch_pos_mask_te = masks.position_mask(batch_pos_te)
                    test_step(decoder, test_loss, m_te, batch_pos_te, batch_tar_te, batch_pos_mask_te)
                    helpers.print_progress(epoch, batch_n, train_loss.result(), test_loss.result(), m_te.result())
                    helpers.tf_summaries(run, step, train_loss.result(), test_loss.result(), m_tr.result(), m_te.result())
                    manager.save()
                step += 1
                ckpt.step.assign_add(1)

            print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Already exists
Restored from /Users/omernivron/Downloads/GPT/ckpt/check_25/ckpt-436
Epoch 0 batch 0 train Loss 0.3070 test Loss 0.3288 with MSE metric 0.3288
Epoch 0 batch 50 train Loss 0.2632 test Loss 0.2954 with MSE metric 0.2954
Epoch 0 batch 100 train Loss 0.2409 test Loss 0.3051 with MSE metric 0.3051
Epoch 0 batch 150 train Loss 0.2059 test Loss 0.2864 with MSE metric 0.2864
Epoch 0 batch 200 train Loss 0.2653 test Loss 0.2926 with MSE metric 0.2926
Epoch 0 batch 250 train Loss 0.2815 test Loss 0.2780 with MSE metric 0.2780
Epoch 0 batch 300 train Loss 0.2676 test Loss 0.3237 with MSE metric 0.3236
Epoch 0 batch 350 train Loss 0.2289 test Loss 0.3234 with MSE metric 0.3234
Epoch 0 batch 400 train Loss 0.2114 test Loss 0.3176 with MSE metric 0.3176
Epoch 0 batch 450 train Loss 0.2612 test Loss 0.3166 with MSE metric 0.3166
Epoch 0 batch 500 train Loss 0.2315 test Loss 0.3008 with MSE metric 0.3008
Epoch 0 batch 550 train Loss 0.2362 test Loss 0.2903 with MSE metric 0.2903
Epoch 0

Epoch 2 batch 1450 train Loss 0.2347 test Loss 0.2913 with MSE metric 0.2913
Epoch 2 batch 1500 train Loss 0.2413 test Loss 0.2806 with MSE metric 0.2806
Epoch 2 batch 1550 train Loss 0.2543 test Loss 0.2912 with MSE metric 0.2912
Epoch 2 batch 1600 train Loss 0.2648 test Loss 0.3080 with MSE metric 0.3080
Epoch 2 batch 1650 train Loss 0.2538 test Loss 0.3020 with MSE metric 0.3020
Epoch 2 batch 1700 train Loss 0.2288 test Loss 0.3104 with MSE metric 0.3104
Epoch 2 batch 1750 train Loss 0.2530 test Loss 0.3118 with MSE metric 0.3118
Epoch 2 batch 1800 train Loss 0.2505 test Loss 0.3055 with MSE metric 0.3055
Epoch 2 batch 1850 train Loss 0.2572 test Loss 0.3108 with MSE metric 0.3108
Time taken for 1 epoch: 187.7289822101593 secs

Epoch 3 batch 0 train Loss 0.2549 test Loss 0.3203 with MSE metric 0.3203
Epoch 3 batch 50 train Loss 0.2404 test Loss 0.2667 with MSE metric 0.2667
Epoch 3 batch 100 train Loss 0.2449 test Loss 0.3273 with MSE metric 0.3273
Epoch 3 batch 150 train Loss 0.206

Epoch 5 batch 1050 train Loss 0.2346 test Loss 0.3577 with MSE metric 0.3577
Epoch 5 batch 1100 train Loss 0.2178 test Loss 0.2950 with MSE metric 0.2950
Epoch 5 batch 1150 train Loss 0.2665 test Loss 0.3018 with MSE metric 0.3018
Epoch 5 batch 1200 train Loss 0.2536 test Loss 0.3231 with MSE metric 0.3231
Epoch 5 batch 1250 train Loss 0.2176 test Loss 0.2897 with MSE metric 0.2897
Epoch 5 batch 1300 train Loss 0.2139 test Loss 0.3340 with MSE metric 0.3340
Epoch 5 batch 1350 train Loss 0.2562 test Loss 0.3115 with MSE metric 0.3115
Epoch 5 batch 1400 train Loss 0.2377 test Loss 0.3161 with MSE metric 0.3161
Epoch 5 batch 1450 train Loss 0.2233 test Loss 0.3072 with MSE metric 0.3072
Epoch 5 batch 1500 train Loss 0.2511 test Loss 0.2972 with MSE metric 0.2972
Epoch 5 batch 1550 train Loss 0.2533 test Loss 0.3166 with MSE metric 0.3166
Epoch 5 batch 1600 train Loss 0.2882 test Loss 0.3068 with MSE metric 0.3068
Epoch 5 batch 1650 train Loss 0.2233 test Loss 0.2607 with MSE metric 0.2607

Epoch 8 batch 650 train Loss 0.2449 test Loss 0.2917 with MSE metric 0.2917
Epoch 8 batch 700 train Loss 0.2304 test Loss 0.3112 with MSE metric 0.3112
Epoch 8 batch 750 train Loss 0.2541 test Loss 0.3145 with MSE metric 0.3145
Epoch 8 batch 800 train Loss 0.2440 test Loss 0.2943 with MSE metric 0.2943
Epoch 8 batch 850 train Loss 0.2225 test Loss 0.2724 with MSE metric 0.2724
Epoch 8 batch 900 train Loss 0.2211 test Loss 0.2979 with MSE metric 0.2979
Epoch 8 batch 950 train Loss 0.2388 test Loss 0.2717 with MSE metric 0.2717
Epoch 8 batch 1000 train Loss 0.2498 test Loss 0.2763 with MSE metric 0.2763
Epoch 8 batch 1050 train Loss 0.2317 test Loss 0.2809 with MSE metric 0.2809
Epoch 8 batch 1100 train Loss 0.2382 test Loss 0.2806 with MSE metric 0.2806
Epoch 8 batch 1150 train Loss 0.2486 test Loss 0.2879 with MSE metric 0.2879
Epoch 8 batch 1200 train Loss 0.2504 test Loss 0.2959 with MSE metric 0.2959
Epoch 8 batch 1250 train Loss 0.2584 test Loss 0.3014 with MSE metric 0.3014
Epoch 

Epoch 11 batch 200 train Loss 0.2528 test Loss 0.2686 with MSE metric 0.2686
Epoch 11 batch 250 train Loss 0.2272 test Loss 0.2888 with MSE metric 0.2888
Epoch 11 batch 300 train Loss 0.2457 test Loss 0.2905 with MSE metric 0.2905
Epoch 11 batch 350 train Loss 0.2315 test Loss 0.3087 with MSE metric 0.3087
Epoch 11 batch 400 train Loss 0.2309 test Loss 0.2887 with MSE metric 0.2887
Epoch 11 batch 450 train Loss 0.2149 test Loss 0.2770 with MSE metric 0.2770
Epoch 11 batch 500 train Loss 0.2296 test Loss 0.3172 with MSE metric 0.3172
Epoch 11 batch 550 train Loss 0.2323 test Loss 0.3052 with MSE metric 0.3052
Epoch 11 batch 600 train Loss 0.2604 test Loss 0.3271 with MSE metric 0.3271
Epoch 11 batch 650 train Loss 0.2655 test Loss 0.3449 with MSE metric 0.3449
Epoch 11 batch 700 train Loss 0.1960 test Loss 0.2982 with MSE metric 0.2982
Epoch 11 batch 750 train Loss 0.2487 test Loss 0.3037 with MSE metric 0.3037
Epoch 11 batch 800 train Loss 0.2322 test Loss 0.3132 with MSE metric 0.3132

Epoch 13 batch 1650 train Loss 0.2626 test Loss 0.3177 with MSE metric 0.3177
Epoch 13 batch 1700 train Loss 0.2425 test Loss 0.2662 with MSE metric 0.2662
Epoch 13 batch 1750 train Loss 0.2151 test Loss 0.3050 with MSE metric 0.3050


KeyboardInterrupt: 

In [26]:
extrapo = True

In [27]:
if extrapo:
    x = np.load('/Users/omernivron/Downloads/GPT_data_goldstandard/x_extra.npy')
    y = np.load('/Users/omernivron/Downloads/GPT_data_goldstandard/y_extra.npy')
else:
    x = np.load('/Users/omernivron/Downloads/GPT_data_goldstandard/x_interpol.npy')
    y = np.load('/Users/omernivron/Downloads/GPT_data_goldstandard/y_interpol.npy')

In [28]:
mse_metric = 0; r_sq_metric = 0; kuee_metric = 0;
μ = []; σ = []
m = int(x.shape[0] / 10)
y_mean = np.mean(y[:m, :40])
y_te = y[:m, 40]
for j in range(0, m):
    x_tr = x[j, :41].reshape(1, -1)
    y_tr = y[j, :40].reshape(1, -1)
    μ_te = infer.inference(decoder, x_tr, y_tr)
#     μ_te, log_σ_te = infer.inference(decoder, x_tr, y_tr, mh=True)


    μ.append(μ_te[0][-1].numpy()); 
#     σ.append(log_σ_te[-1])
#     kuee_metric += metrics.KUEE(y_te[j], μ_te[-1], np.exp(log_σ_te[-1]))
#     if (j % 400 == 0): 
#         print('J: ', j)
#         axes = plt.gca()
#         axes.set_ylim([-2, 2])
#         plt.scatter(x_tr[:, :-1], y_tr, c = 'black')
#         plt.scatter(x_tr[:, 1:], μ_te, c='navy')
#         plt.scatter(x_tr[:, -1], y_te[j], c='purple')
#         plt.scatter(x_tr[:, -1], μ_te[-1], c='red')
# #         plt.errorbar(x = x_tr[:, 40], y = (μ_te[-1]), yerr = 2 * np.exp(log_σ_te[-1]), fmt='o', ecolor='g', capthick=2)


        
        
# #         plt.fill_between(x_tr[:, 1:].squeeze(), μ_te -2 * np.exp(log_σ_te), μ_te  + 2 * np.exp(log_σ_te), alpha=.2)

#         plt.show()
    
mse_metric = metrics.mse(y_te, μ) 
r_sq_metric = metrics.r_squared(y_te, μ, y_mean)  
mse_metric *= (1 / m)
# kuee_metric *= (1 / m)

In [29]:
r_sq_metric

0.5012839537371766

In [30]:
    mse_metric

0.4851938633914596