In [1]:
import os
import numpy as np
import pickle as pc
import tensorflow as tf
from utils import *

#from seq2seq import *
from Encoder import *
from Decoder import *

In [2]:
LBNL_DATA_DIR = 'data/LBNL Building 74/lbnlb74electricity.xlsx'
horizon_size = 96
best_loss = 100000000.0
lr = 0.0001
batch_size = 16
max_patience = 7
max_epochs = 100
checkpoint_dir = 'seq2seq_attn_checkpoint'
LOG_INTERVAL = 200
keep_rate = 0.5
enc_unit = 16
dec_unit = 16

attn = 'bah' # 'bah' , 'luong'

debug = False

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [3]:
def train():
    # data load
    time, elec = load_data(LBNL_DATA_DIR)
    
    # modify
    elec = missing_value(elec)
    print(elec[40226])
    
    elec = std_normalize(elec)
    
    # split data
    train, valid, test = split_dataset(elec)
    
    # build dataset
    train_enc_data, train_dec_data = build_dataset(train, horizon_size)
    val_enc_data, val_dec_data = build_dataset(valid, horizon_size)
    test_enc_data, test_dec_data = build_dataset(test, horizon_size)
    print("Build Dataset Finished")
    print("----------------------")
    print("[Train] enc {}\tdec {}".format(len(train_enc_data), len(train_dec_data)))
    print("[Dev] enc {}\tdec {}".format(len(val_enc_data), len(val_dec_data)))
    print("[Test] enc {}\tdec {}".format(len(test_enc_data), len(test_dec_data)))
    
    # model
    #seq2seq = Seq2Seq(enc_unit, dec_unit, batch_size, horizon_size, dropout_rate)
    encoder = Encoder(enc_unit, batch_size, horizon_size, keep_rate)
    decoder = Decoder(dec_unit, batch_size, horizon_size, keep_rate, attn)
    # optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    
    # loss & metric obj
    loss_obj = tf.keras.losses.MeanSquaredError()
    val_loss_obj = tf.keras.losses.MeanSquaredError()
    
    # Earlystopping
    earlystopping = EarlyStopping(best_loss, max_patience)
    
    # ckpt dir
    ckpt_dir = os.path.join(checkpoint_dir, 'lr-{}_hidden-{}_hr-{}'.format(lr, enc_unit, horizon_size))
    best_ckpt_dir = os.path.join(checkpoint_dir, 'best_lr-{}_hidden-{}_hr-{}'.format(lr, enc_unit, horizon_size))
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if not os.path.exists(best_ckpt_dir):
        os.makedirs(best_ckpt_dir)
    
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     encoder=encoder,
                                     decoder=decoder)
    
    num_batches_per_epoch = (len(train_enc_data)-1) // batch_size + 1
    print("num_batches_per_epoch: {}".format(num_batches_per_epoch))
    
    for epoch in range(max_epochs):
        
        train_batches = batch_iter(train_enc_data, train_dec_data, batch_size)
        
        batch_idx = 0
        
        train_loss = 0.
        train_rmse = 0.
        val_loss = 0.
        val_rmse = 0.
        
        for batch_idx, (batch_x, batch_y) in enumerate(train_batches):
            
            batch_enc_input = np.array(list(map(lambda x: list(x), batch_x))) # <eos>:1
            batch_dec_input = np.array(list(map(lambda x: [2] + list(x), batch_y))) # <sos>: 2
            batch_dec_target = np.array(list(map(lambda x: list(x) + [3], batch_y)))
            
            batch_loss = 0.

            with tf.GradientTape() as tape:
                
                #logits = seq2seq(batch_enc_input, batch_dec_input, True, 'train')
                enc_output, enc_state = encoder(batch_enc_input, training=True)
                
                dec_hidden = enc_state
                
                for t in range(batch_dec_input.shape[1]):
                    if attn == 'no':
                        predictions, dec_hidden = decoder(batch_dec_input[:, t], dec_hidden, enc_output, training=True)
                    else:
                        predictions, dec_hidden, _ = decoder(batch_dec_input[:, t], dec_hidden, enc_output, training=True)
                        
                    y_true = tf.reshape(batch_dec_target[:,t], (batch_dec_target.shape[0], 1))
                    # loss
                    loss = loss_obj(y_true, predictions)
                    #total_loss.append(loss)
                    
                    batch_loss += tf.reduce_mean(loss)
                    
                trainable_variables = encoder.trainable_variables + decoder.trainable_variables
            
            gradients = tape.gradient(batch_loss, trainable_variables)
            optimizer.apply_gradients(zip(gradients, trainable_variables))
            
            train_batch_loss = batch_loss / int(batch_dec_input.shape[1])
            train_batch_rmse = tf.math.sqrt(train_batch_loss)
                                
            train_loss += train_batch_loss
            train_rmse += train_batch_rmse
            #train_losses.update(tf.reduce_mean(total_loss), batch_x.shape[0])
            #train_rmses.update(rmse, batch_x.shape[0])

            if (batch_idx+1) % LOG_INTERVAL == 0:
                print("[epoch {} | step {}/{}] loss: {:.4f} (Avg. {:.4f}) RMSE: {:.4f} (Avg. {:.4f})".format(epoch + 1,
                                                                                                            batch_idx+1, num_batches_per_epoch,
                                                                                                            train_batch_loss, train_loss/(batch_idx+1),
                                                                                                            train_batch_rmse, train_rmse/(batch_idx+1)))

        val_batch_idx = 0
        
        val_batches = batch_iter(val_enc_data, val_dec_data, batch_size)
        
        for val_batch_idx, (val_batch_x, val_batch_y) in enumerate(val_batches):
            
            if debug:
                print("val batch: {}\n{}".format(val_batch_x, val_batch_y))

                
            val_batch_enc_input = np.array(list(map(lambda x: list(x), val_batch_x)), dtype=np.float64)
            val_batch_dec_input = np.array(list(map(lambda x: [2] + list(x), val_batch_y)), dtype=np.float64)
            val_batch_dec_target = np.array(list(map(lambda x: list(x) + [3], val_batch_y)), dtype=np.float64)
            
            
            if debug:
                print("val enc: {}\n".format(val_batch_enc_input))
                print("val dec: {}\n{}".format(val_batch_dec_input, val_batch_dec_target))
                
            val_enc_output, val_enc_state = encoder(val_batch_enc_input, training=False)
            
            val_dec_hidden = val_enc_state
            
            if debug:
                print("time step: {}".format(val_batch_dec_input.shape[1]))
                
            val_batch_loss = 0.
            for t in range(val_batch_dec_input.shape[1]):
                
                if attn == 'no':
                    val_predictions, val_dec_hidden = decoder(val_batch_dec_input[:,t], val_dec_hidden, val_enc_output, training=False)
                else:
                    val_predictions, val_dec_hidden, _ = decoder(val_batch_dec_input[:,t], val_dec_hidden, val_enc_output, training=False)
                if debug:
                    print("pred: {}\n".format(val_predictions))
                    print("true: {}".format(val_batch_dec_target[:,t]))
                    print("val batch_target: {}\n".format(val_batch_dec_target[:,t].shape))
                
                val_y_true = tf.reshape(val_batch_dec_target[:,t], (val_batch_dec_target.shape[0],1))
                if debug:
                    print("val batch_target: {}\n".format(val_batch_dec_target[:,t]))
                    print("val_true: {}\n".format(val_y_true))
                    
                loss = val_loss_obj(val_y_true, val_predictions)
                
                if debug:
                    print("loss: {}\t{}\n".format(loss, tf.reduce_mean(loss)))
                
                val_batch_loss += tf.reduce_mean(loss)
                #val_dec_input = tf.expand_dims(val_batch_dec_target[:,t], 1)
                #val_total_loss.append(loss)
            
            if debug:
                print("batch loss: {}".format(val_batch_loss/int(val_batch_dec_input.shape[1])))

            val_batch_loss = val_batch_loss / int(val_batch_dec_input.shape[1])
            val_batch_rmse = tf.math.sqrt(val_batch_loss)
            #val_batch_loss = tf.reduce_mean(val_loss)
            
            val_loss += val_batch_loss
            val_rmse += val_batch_rmse
            
            
            if debug:
                print("loss: {}\trmse: {}\tidx: {}".format(val_loss/(val_batch_idx+1), val_rmse/(val_batch_idx+1), val_batch_idx+1))

        print("[epoch {}] loss: {:.4f} RMSE: {:.4f}".format(epoch + 1, val_loss/(val_batch_idx+1), val_rmse/(val_batch_idx+1)))

        # applying earlystopping
        early = earlystopping.update(val_loss/(val_batch_idx+1), epoch)
        if early == 'update':
            ckpt_prefix = os.path.join(best_ckpt_dir, 'best_ckpt_{}'.format(epoch+1))            
            checkpoint.save(file_prefix = ckpt_prefix)
            print("[epoch {} patience {} max_patience {} best_loss {}]\tModel best performance!".format(epoch+1, earlystopping.patience, earlystopping.max_patience, earlystopping.best_loss))
        elif early == 'patience':
            ckpt_prefix = os.path.join(ckpt_dir, 'ckpt_{}'.format(epoch+1))                    
            checkpoint.save(file_prefix = ckpt_prefix)
            print("[epoch {} patience {} max_patience {} best_loss {}]\tModel is saved".format(epoch+1, earlystopping.patience, earlystopping.max_patience, earlystopping.best_loss))
        else:
            print("[epoch {} best_epoch {} patience {} max_patience {} best_loss {}]\tTraining process is finished".format(epoch+1, earlystopping.best_epoch+1, earlystopping.patience, earlystopping.max_patience, earlystopping.best_loss))
            break
        

In [None]:
if __name__ == '__main__':
    train()

  warn(msg)


Size of time, elec: 46111	46111
43.899
Mean: 31.535117450933615	Std: 8.103161225783838
Size of train, valid, test: 27973	6994	11144
Build Dataset Finished
----------------------
[Train] enc 27781	dec 27781
[Dev] enc 6802	dec 6802
[Test] enc 10952	dec 10952
num_batches_per_epoch: 1737


W1208 11:56:56.028552 140249027946304 base_layer.py:1772] Layer encoder is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1208 11:56:57.168847 140249027946304 base_layer.py:1772] Layer decoder is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If yo

[epoch 1 | step 200/1737] loss: 0.3855 (Avg. 0.4711) RMSE: 0.6209 (Avg. 0.6809)
[epoch 1 | step 400/1737] loss: 0.4993 (Avg. 0.4666) RMSE: 0.7066 (Avg. 0.6780)
[epoch 1 | step 600/1737] loss: 0.4821 (Avg. 0.4960) RMSE: 0.6943 (Avg. 0.6957)
[epoch 1 | step 800/1737] loss: 1.8262 (Avg. 0.5543) RMSE: 1.3514 (Avg. 0.7211)
[epoch 1 | step 1000/1737] loss: 1.3295 (Avg. 0.5563) RMSE: 1.1530 (Avg. 0.7235)
[epoch 1 | step 1200/1737] loss: 0.4585 (Avg. 0.5667) RMSE: 0.6771 (Avg. 0.7310)
[epoch 1 | step 1400/1737] loss: 0.4922 (Avg. 0.5876) RMSE: 0.7016 (Avg. 0.7432)
[epoch 1 | step 1600/1737] loss: 0.3018 (Avg. 0.5663) RMSE: 0.5494 (Avg. 0.7297)
[epoch 1] loss: 0.3224 RMSE: 0.5564
[epoch 1 patience 0 max_patience 7 best_loss 0.3224213719367981]	Model best performance!
[epoch 2 | step 200/1737] loss: 0.3021 (Avg. 0.4169) RMSE: 0.5497 (Avg. 0.6385)
[epoch 2 | step 400/1737] loss: 0.4333 (Avg. 0.4230) RMSE: 0.6582 (Avg. 0.6436)
[epoch 2 | step 600/1737] loss: 0.4843 (Avg. 0.4267) RMSE: 0.6959 (Avg.