In [1]:
from model import river_model, losses, dot_prod_attention
from data import data_generation, batch_creator, gp_kernels
from keras.callbacks import ModelCheckpoint
from helpers import helpers, masks, metrics
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from inference import infer
import tensorflow as tf
import numpy as np
import matplotlib 
import time
import keras
plt.style.use('ggplot')

Using TensorFlow backend.


In [37]:
attributes_numeric = pd.read_csv('/Users/omernivron/Downloads/att_numeric')

In [5]:
arr_pp = np.load('/Users/omernivron/Downloads/river_processed.npy')

In [6]:
arr_pe = np.load('/Users/omernivron/Downloads/river_processed_te.npy')

In [24]:
save_dir = '/Users/omernivron/Downloads/GPT_river'

In [39]:
loss_object = tf.keras.losses.MeanSquaredError()
train_loss = tf.keras.metrics.Mean(name='train_loss')
test_loss = tf.keras.metrics.Mean(name='test_loss')
m_tr = tf.keras.metrics.Mean()
m_te = tf.keras.metrics.Mean()

In [40]:
@tf.function
def train_step(decoder, optimizer_c, train_loss, m_tr, token_pos, time_pos, time_pos2, pos, tar, pos_mask):
    '''
    A typical train step function for TF2. Elements which we wish to track their gradient
    has to be inside the GradientTape() clause. see (1) https://www.tensorflow.org/guide/migrate 
    (2) https://www.tensorflow.org/tutorials/quickstart/advanced
    ------------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask (np array): see description in position_mask function
    ------------------    
    '''
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    combined_mask_tar = masks.create_masks(tar_inp)
    with tf.GradientTape(persistent=True) as tape:
        pred, pred_log_sig = decoder(token_pos, time_pos, time_pos2, pos, tar_inp, True, pos_mask, combined_mask_tar)
#         print('pred: ')
#         tf.print(pred_sig)

        loss, mse, mask = losses.loss_function(tar_real, pred, pred_log_sig)


    gradients = tape.gradient(loss, decoder.trainable_variables)
#     tf.print(gradients)
# Ask the optimizer to apply the processed gradients.
    optimizer_c.apply_gradients(zip(gradients, decoder.trainable_variables))
    train_loss(loss)
    m_tr.update_state(mse, mask)
#     b = decoder.trainable_weights[0]
#     tf.print(tf.reduce_mean(b))
    return tar_inp, tar_real, pred, pred_log_sig, mask

In [41]:
@tf.function
def test_step(decoder, test_loss, m_te, token_pos_te, time_pos_te, time_pos2_te, pos_te, tar_te, pos_mask_te):
    '''
    
    ---------------
    Parameters:
    pos (np array): array of positions (x values) - the 1st/2nd output from data_generator_for_gp_mimick_gpt
    tar (np array): array of targets. Notice that if dealing with sequnces, we typically want to have the targets go from 0 to n-1. The 3rd/4th output from data_generator_for_gp_mimick_gpt  
    pos_mask_te (np array): see description in position_mask function
    ---------------
    
    '''
    tar_inp_te = tar_te[:, :-1]
    tar_real_te = tar_te[:, 1:]
    combined_mask_tar_te = masks.create_masks(tar_inp_te)
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
    pred_te, pred_log_sig_te = decoder(token_pos_te, time_pos_te, time_pos2_te, pos_te, tar_inp_te, False, pos_mask_te, combined_mask_tar_te)
    t_loss, t_mse, t_mask = losses.loss_function(tar_real_te, pred_te, pred_log_sig_te)
    test_loss(t_loss)
    m_te.update_state(t_mse, t_mask)
    return tar_real_te, pred_te, pred_log_sig_te, t_mask

In [42]:
tf.keras.backend.set_floatx('float64')

In [43]:
t1_tr = arr_pp[1::5]; t2_tr = arr_pp[2::5]
tar_tr = arr_pp[0::5];
token_tr = arr_pp[3::5]; basin_l_tr = arr_pp[4::5] 

In [44]:
t1_te = arr_pe[1::5]; t2_te = arr_pe[2::5]
tar_te = arr_pe[0::5];
token_te = arr_pe[3::5]; basin_l_te = arr_pe[4::5] 

In [None]:
if __name__ == '__main__':
    writer = tf.summary.create_file_writer(save_dir + '/logs/')
    optimizer_c = tf.keras.optimizers.Adam()
    decoder = river_model.Decoder(16)
    EPOCHS = 500
    batch_s  = 32
    run = 0; step = 0
    num_batches = int(tar_tr.shape[0] / batch_s)
    tf.random.set_seed(1)
    ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer = optimizer_c, net = decoder)
    main_folder = "/Users/omernivron/Downloads/GPT_river/ckpt/check_"
    folder = main_folder + str(run); helpers.mkdir(folder)
    #https://www.tensorflow.org/guide/checkpoint
    manager = tf.train.CheckpointManager(ckpt, folder, max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    with writer.as_default():
        for epoch in range(EPOCHS):
            start = time.time()

            for batch_n in range(num_batches):
                m_tr.reset_states(); train_loss.reset_states()
                m_te.reset_states(); test_loss.reset_states()
                batch_tok_pos_tr, batch_tim_pos_tr, batch_tim_pos_tr2, batch_pos_tr, batch_tar_tr, _ = batch_creator.create_batch_river(token_tr, t1_tr, t2_tr, basin_l_tr,  attributes_numeric, tar_tr, batch_s=32)
                batch_pos_mask = masks.position_mask(batch_tok_pos_tr)
                tar_inp, tar_real, pred, pred_log_sig, mask = train_step(decoder, optimizer_c, train_loss, m_tr, batch_tok_pos_tr, batch_tim_pos_tr, batch_tim_pos_tr2, batch_pos_tr, batch_tar_tr, batch_pos_mask)

                if batch_n % 100 == 0:
                    batch_tok_pos_te, batch_tim_pos_te, batch_tim_pos_te2, batch_pos_te, batch_tar_te, _ = batch_creator.create_batch_river(token_te, t1_te, t2_te, basin_l_te,  attributes_numeric, tar_te, batch_s=32)
                    batch_pos_mask_te = masks.position_mask(batch_tok_pos_te)
                    tar_real_te, pred_te, pred_log_sig_te, t_mask = test_step(decoder, test_loss, m_te, batch_tok_pos_te, batch_tim_pos_te, batch_tim_pos_te2, batch_pos_te, batch_tar_te, batch_pos_mask_te)
                    helpers.print_progress(epoch, batch_n, train_loss.result(), test_loss.result(), m_tr.result())
                    helpers.tf_summaries(run, step, train_loss.result(), test_loss.result(), m_tr.result(), m_te.result())
                    manager.save()
                step += 1
                ckpt.step.assign_add(1)

            print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Already exists
Restored from /Users/omernivron/Downloads/GPT_river/ckpt/check_0/ckpt-41
Epoch 0 batch 0 train Loss 83.1630 test Loss 8.6789 with MSE metric 160927.1250
Epoch 0 batch 100 train Loss 58.8439 test Loss 7.9786 with MSE metric 250305.1875
Epoch 0 batch 200 train Loss 41.8898 test Loss 8.3989 with MSE metric 240539.0312
Epoch 0 batch 300 train Loss 236.0662 test Loss 8.1602 with MSE metric 105619.0469
Time taken for 1 epoch: 63.16648006439209 secs

Epoch 1 batch 0 train Loss 76.7884 test Loss 8.1410 with MSE metric 100045.8750
Epoch 1 batch 100 train Loss 150.8243 test Loss 8.1329 with MSE metric 159931.4531
Epoch 1 batch 200 train Loss 40.7146 test Loss 8.5260 with MSE metric 109778.3125
Epoch 1 batch 300 train Loss 10.7738 test Loss 8.5501 with MSE metric 47570.4883
Time taken for 1 epoch: 61.53780198097229 secs

Epoch 2 batch 0 train Loss 78.3459 test Loss 7.9623 with MSE metric 125895.0156
Epoch 2 batch 100 train Loss 14.4509 test Loss 8.6691 with MSE metric 107578.5000
E

Time taken for 1 epoch: 64.52017211914062 secs

Epoch 22 batch 0 train Loss 8.5261 test Loss 9.8804 with MSE metric 199715.1562
Epoch 22 batch 100 train Loss 7.8528 test Loss 10.1375 with MSE metric 29103.1953
Epoch 22 batch 200 train Loss 7.7125 test Loss 10.1581 with MSE metric 30892.5918
Epoch 22 batch 300 train Loss 8.0753 test Loss 9.7710 with MSE metric 182269.4531
Time taken for 1 epoch: 64.57459998130798 secs

Epoch 23 batch 0 train Loss 9.0407 test Loss 9.9144 with MSE metric 164771.1250
Epoch 23 batch 100 train Loss 9.0602 test Loss 9.6778 with MSE metric 249782.6250
Epoch 23 batch 200 train Loss 15.4989 test Loss 9.9605 with MSE metric 232768.0625
Epoch 23 batch 300 train Loss 8.3160 test Loss 9.8987 with MSE metric 163710.0938
Time taken for 1 epoch: 62.251121282577515 secs

Epoch 24 batch 0 train Loss 8.2905 test Loss 9.6634 with MSE metric 272372.7500
Epoch 24 batch 100 train Loss 9.0512 test Loss 9.7734 with MSE metric 88452.9062
Epoch 24 batch 200 train Loss 8.1791 test

Time taken for 1 epoch: 62.88431692123413 secs

Epoch 44 batch 0 train Loss 6.8936 test Loss 8.0245 with MSE metric 64302.7812
Epoch 44 batch 100 train Loss 7.4135 test Loss 8.0743 with MSE metric 232925.3750
Epoch 44 batch 200 train Loss 7.3593 test Loss 7.9226 with MSE metric 202448.1562
Epoch 44 batch 300 train Loss 7.0075 test Loss 8.2144 with MSE metric 114110.9375
Time taken for 1 epoch: 64.08842325210571 secs

Epoch 45 batch 0 train Loss 7.2085 test Loss 8.1187 with MSE metric 178431.4375
Epoch 45 batch 100 train Loss 7.0109 test Loss 7.9522 with MSE metric 102456.6719
Epoch 45 batch 200 train Loss 6.9413 test Loss 7.8502 with MSE metric 94884.8906
Epoch 45 batch 300 train Loss 7.5711 test Loss 7.7912 with MSE metric 278274.5312
Time taken for 1 epoch: 62.89453387260437 secs

Epoch 46 batch 0 train Loss 7.0751 test Loss 7.8994 with MSE metric 143181.0781
Epoch 46 batch 100 train Loss 7.2929 test Loss 7.9254 with MSE metric 211754.5312
Epoch 46 batch 200 train Loss 6.9889 test Lo

Time taken for 1 epoch: 63.904216051101685 secs

Epoch 66 batch 0 train Loss 7.0131 test Loss 7.1855 with MSE metric 193302.4531
Epoch 66 batch 100 train Loss 6.8216 test Loss 7.3470 with MSE metric 146461.2188
Epoch 66 batch 200 train Loss 6.5990 test Loss 7.0628 with MSE metric 91737.5469
Epoch 66 batch 300 train Loss 6.7137 test Loss 7.2263 with MSE metric 112858.1484
Time taken for 1 epoch: 62.20226192474365 secs

Epoch 67 batch 0 train Loss 6.3593 test Loss 7.1180 with MSE metric 41285.7344
Epoch 67 batch 100 train Loss 6.6005 test Loss 6.9896 with MSE metric 95269.5469
Epoch 67 batch 200 train Loss 7.3718 test Loss 7.0102 with MSE metric 246492.2031
Epoch 67 batch 300 train Loss 6.9538 test Loss 7.3461 with MSE metric 190247.0312
Time taken for 1 epoch: 61.64045429229736 secs

Epoch 68 batch 0 train Loss 7.0259 test Loss 7.1513 with MSE metric 204596.1406
Epoch 68 batch 100 train Loss 6.6426 test Loss 7.1499 with MSE metric 100391.3750
Epoch 68 batch 200 train Loss 6.8268 test Lo

Time taken for 1 epoch: 63.57934308052063 secs

Epoch 88 batch 0 train Loss 6.6474 test Loss 6.5572 with MSE metric 103754.4297
Epoch 88 batch 100 train Loss 6.4543 test Loss 6.6516 with MSE metric 72944.3125
Epoch 88 batch 200 train Loss 6.2891 test Loss 6.2621 with MSE metric 49623.0234
Epoch 88 batch 300 train Loss 6.2073 test Loss 6.1694 with MSE metric 43677.0469
Time taken for 1 epoch: 64.34267067909241 secs

Epoch 89 batch 0 train Loss 6.3063 test Loss 6.4855 with MSE metric 54348.6250
Epoch 89 batch 100 train Loss 6.6600 test Loss 6.3215 with MSE metric 89443.8281
Epoch 89 batch 200 train Loss 6.3111 test Loss 6.2264 with MSE metric 54401.4141
Epoch 89 batch 300 train Loss 6.3388 test Loss 6.4903 with MSE metric 57435.0703
Time taken for 1 epoch: 63.98026514053345 secs

Epoch 90 batch 0 train Loss 6.2201 test Loss 6.3858 with MSE metric 43236.0352
Epoch 90 batch 100 train Loss 6.5018 test Loss 6.4711 with MSE metric 76504.5000
Epoch 90 batch 200 train Loss 6.6334 test Loss 6.13

Epoch 110 batch 0 train Loss 6.1951 test Loss 6.2308 with MSE metric 43689.7031
Epoch 110 batch 100 train Loss 5.9848 test Loss 6.1912 with MSE metric 23197.3242
Epoch 110 batch 200 train Loss 6.3971 test Loss 5.9651 with MSE metric 65086.2656
Epoch 110 batch 300 train Loss 6.1842 test Loss 6.2280 with MSE metric 42771.2930
Time taken for 1 epoch: 61.127249002456665 secs

Epoch 111 batch 0 train Loss 6.1194 test Loss 6.1247 with MSE metric 35733.3125
Epoch 111 batch 100 train Loss 6.1296 test Loss 6.0337 with MSE metric 37839.0312
Epoch 111 batch 200 train Loss 6.1737 test Loss 6.2345 with MSE metric 40810.5469
Epoch 111 batch 300 train Loss 5.9905 test Loss 6.0412 with MSE metric 21286.5234
Time taken for 1 epoch: 60.36514091491699 secs

Epoch 112 batch 0 train Loss 5.9594 test Loss 6.2906 with MSE metric 26095.0039
Epoch 112 batch 100 train Loss 6.2949 test Loss 6.1780 with MSE metric 53433.8047
Epoch 112 batch 200 train Loss 6.3612 test Loss 6.3141 with MSE metric 61156.6367
Epoch 1

Epoch 132 batch 0 train Loss 6.3195 test Loss 6.4942 with MSE metric 53576.4219
Epoch 132 batch 100 train Loss 6.4951 test Loss 6.3670 with MSE metric 69256.4531
Epoch 132 batch 200 train Loss 6.0382 test Loss 6.0554 with MSE metric 27548.4688
Epoch 132 batch 300 train Loss 6.7778 test Loss 6.5213 with MSE metric 95780.5781
Time taken for 1 epoch: 64.7581832408905 secs

Epoch 133 batch 0 train Loss 5.8110 test Loss 6.2293 with MSE metric 8998.9072
Epoch 133 batch 100 train Loss 6.2307 test Loss 6.2792 with MSE metric 47416.1250
Epoch 133 batch 200 train Loss 6.7006 test Loss 6.7044 with MSE metric 96699.8125
Epoch 133 batch 300 train Loss 6.5096 test Loss 5.9848 with MSE metric 75449.9844
Time taken for 1 epoch: 64.17077779769897 secs

Epoch 134 batch 0 train Loss 6.0270 test Loss 6.2357 with MSE metric 27324.8555
Epoch 134 batch 100 train Loss 5.9631 test Loss 6.0108 with MSE metric 24973.0156
Epoch 134 batch 200 train Loss 6.2372 test Loss 6.2365 with MSE metric 46685.2734
Epoch 134 