In [1]:
import os
from datetime import datetime
import data_preparation as sf
import rnn_enc_dec as red


PROJECT_ROOT_DIR = "/home/ubuntu/TroubledLife"
#PROJECT_ROOT_DIR = "/Users/gopora/MyStuff/Dev/Workspaces/Sandbox/TroubledLife"
DATASETS_DIR = os.path.join(PROJECT_ROOT_DIR, "data")
TF_LOG_DIR = os.path.join(PROJECT_ROOT_DIR, "tf_logs")
MODEL_CHECKPOINTS_DIR = os.path.join(PROJECT_ROOT_DIR, "model_checkpoints")
TRAINING_SET_DATA_FILE = "pure_life_policy_train_data.csv"
TEST_SET_DATA_FILE = "pure_life_policy_test_data.csv"

now = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
log_dir = "{}/run-{}/".format(TF_LOG_DIR, now)

In [2]:
#sf.generate_life_policy_data(no_of_policies=10000, runtime=5, trouble=False, file_path=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))

#sf.generate_life_policy_data(no_of_policies=2000, runtime=5, trouble=False, file_path=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

policy_histories_train = \
    sf.load_life_policy_data(file_path=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))

policy_histories_test = \
    sf.load_life_policy_data(file_path=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

policy_histories_length_train, max_policy_history_length_train = \
    sf.get_policy_history_lengths(policy_histories=policy_histories_train)

policy_histories_length_test, max_policy_history_length_test = \
    sf.get_policy_history_lengths(policy_histories=policy_histories_test)

max_policy_history_length = max(max_policy_history_length_train, max_policy_history_length_test)

In [3]:
# Pad the histories up to maximum length of both, train and test set

# policy_histories_train = \
#     sf.pad_life_policy_histories(policy_histories=policy_histories_train,
#                                           policy_histories_lengths=policy_histories_length_train,
#                                           max_policy_history_length=max_policy_history_length)
# 
# policy_histories_test = \
#     sf.pad_life_policy_histories(policy_histories=policy_histories_test,
#                                           policy_histories_lengths=policy_histories_length_test,
#                                           max_policy_history_length=max_policy_history_length)
# 
# # Save padded data, since always generating and padding takes too long
# policy_histories_train.to_csv(path_or_buf=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))
# policy_histories_test.to_csv(path_or_buf=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

# Extract features and labels from dataset as numpy.ndarray(s)
train_labels, train_features, train_seq_lengths =\
    sf.prepare_labels_features_lengths(policy_histories=policy_histories_train,
                                       policy_histories_lengths=policy_histories_length_train, 
                                       max_policy_history_length=max_policy_history_length)
test_labels, test_features, test_seq_lengths =\
    sf.prepare_labels_features_lengths(policy_histories=policy_histories_test,
                                       policy_histories_lengths=policy_histories_length_test, 
                                       max_policy_history_length=max_policy_history_length)

train_data = sf.TrainDataSet(train_labels=train_labels, train_features=train_features, train_seq_lengths=train_seq_lengths)

In [6]:
import tensorflow as tf


tf.reset_default_graph()
tf.set_random_seed(42)

# Our hyperparameters
n_steps = max_policy_history_length
n_inputs = 2
n_layers = 3
n_neurons = 200
n_latent = 100
learning_rate = 0.0001
bidirectional = True

# Placeholders for our input sequences
seq_length = tf.placeholder(tf.int32, [None], name="seq_length")
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs], name="X")

latent_vector = red.encoder(X=X, seq_length=seq_length, n_layers=n_layers, n_neurons=n_neurons, n_latent=n_latent)

decoder_sequence_train = red.decoder(n_latent=n_latent, n_layers=n_layers, n_neurons=n_neurons, n_outputs=n_inputs, 
                               latent_vector=latent_vector, X=X, seq_length=seq_length, training=True, reuse=False)

decoder_sequence_test = red.decoder(n_latent=n_latent, n_layers=n_layers, n_neurons=n_neurons, n_outputs=n_inputs, 
                               latent_vector=latent_vector, X=X, seq_length=seq_length, training=True, reuse=True)

tvars = tf.trainable_variables()

for var in tvars:
    print(var) 
    
loss_train = tf.reduce_mean(tf.square(decoder_sequence_train - X)) # MSE
loss_test = tf.reduce_mean(tf.square(decoder_sequence_test - X)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss_train)

train_loss_summary = tf.summary.scalar('Train_loss', loss_train)
test_loss_summary = tf.summary.scalar('Test_loss', loss_test)

# Both are needed when using precision and recall metrics
init_g = tf.global_variables_initializer()
init_l = tf.local_variables_initializer()

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

saver = tf.train.Saver(var_list=tvars)

encoder states shape: (?, 600)
latent_vector shape: (?, 100)
state_input shape: (?, 200)
decoder initial state shape:  (?, 600)
decoder outputs rnn_output shape: (?, ?, 2)
state_input shape: (?, 200)
decoder initial state shape:  (?, 600)
decoder outputs rnn_output shape: (?, ?, 2)
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_0/basic_rnn_cell/kernel:0' shape=(202, 200) dtype=float32_ref>
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_0/basic_rnn_cell/bias:0' shape=(200,) dtype=float32_ref>
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_1/basic_rnn_cell/kernel:0' shape=(400, 200) dtype=float32_ref>
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_1/basic_rnn_cell/bias:0' shape=(200,) dtype=float32_ref>
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_2/basic_rnn_cell/kernel:0' shape=(400, 200) dtype=float32_ref>
<tf.Variable 'P_Encoder/rnn/multi_rnn_cell/cell_2/basic_rnn_cell/bias:0' shape=(200,) dtype=float32_ref>
<tf.Variable 'P_Encoder_2_Latent/W:0' shape=(600, 100) dtype=float32_

In [7]:
from time import time

start = time()

n_epochs = 100
batch_size = 200

with tf.Session() as sess:
    init_g.run()
    init_l.run()
    
    for epoch in range(n_epochs):
        for i in range(train_data.num_examples // batch_size):
            _, X_batch, seq_length_batch = train_data.next_batch(batch_size)
            
            _, lTrain = sess.run([training_op, loss_train], feed_dict={X: X_batch, seq_length: seq_length_batch})

        lTest = loss_test.eval(feed_dict={X: test_features, seq_length: test_seq_lengths})

        print("Epoch:", epoch, "Train loss:", lTrain, "Test loss:", lTest)
        #print(X_batch[0])
        #print(Dec_x[0])

        train_loss_summary_str = train_loss_summary.eval(feed_dict={X: X_batch, seq_length: seq_length_batch})
        test_loss_summary_str = test_loss_summary.eval(feed_dict={X: test_features, seq_length: test_seq_lengths})
        
        writer.add_summary(train_loss_summary_str, epoch)
        writer.add_summary(test_loss_summary_str, epoch)

    saver.save(sess, os.path.join(MODEL_CHECKPOINTS_DIR, "rnn_enc_dec.ckpt"))    

writer.close()

elapsed = time() - start
print("Time to finish %d", elapsed)



Epoch: 0 Train loss: 2.0611e+06 Test loss: 1.96828e+06
Epoch: 1 Train loss: 242873.0 Test loss: 262886.0
Epoch: 2 Train loss: 52124.2 Test loss: 55244.0
Epoch: 3 Train loss: 25863.2 Test loss: 33879.6
Epoch: 4 Train loss: 23601.4 Test loss: 31092.7
Epoch: 5 Train loss: 21890.5 Test loss: 29759.1
Epoch: 6 Train loss: 18126.6 Test loss: 28953.4
Epoch: 7 Train loss: 18382.3 Test loss: 28485.2
Epoch: 8 Train loss: 21469.7 Test loss: 27973.4
Epoch: 9 Train loss: 16224.7 Test loss: 27612.6
Epoch: 10 Train loss: 18740.7 Test loss: 26820.0
Epoch: 11 Train loss: 19723.7 Test loss: 26278.6
Epoch: 12 Train loss: 19236.3 Test loss: 26012.8
Epoch: 13 Train loss: 18209.6 Test loss: 25584.4
Epoch: 14 Train loss: 19779.9 Test loss: 24417.4
Epoch: 15 Train loss: 16148.7 Test loss: 23483.4
Epoch: 16 Train loss: 13085.9 Test loss: 22471.6
Epoch: 17 Train loss: 13942.1 Test loss: 20043.5
Epoch: 18 Train loss: 10837.4 Test loss: 18322.2
Epoch: 19 Train loss: 9299.94 Test loss: 16013.1
Epoch: 20 Train loss: