In [6]:
import os
from datetime import datetime
import data_preparation as sf


PROJECT_ROOT_DIR = "/Users/gopora/MyStuff/Dev/Workspaces/Sandbox/TroubledLife"
DATASETS_DIR = os.path.join(PROJECT_ROOT_DIR, "data")
TF_LOG_DIR = os.path.join(PROJECT_ROOT_DIR, "tf_logs")
TRAINING_SET_DATA_FILE = "troubled_life_policy_train_data.csv"
TEST_SET_DATA_FILE = "troubled_life_policy_test_data.csv"

now = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
log_dir = "{}/run-{}/".format(TF_LOG_DIR, now)

In [7]:
#sf.generate_troubled_life_policy_data(no_of_policies=10000, runtime=5, file_path=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))

#sf.generate_troubled_life_policy_data(no_of_policies=2000, runtime=5, file_path=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

policy_histories_train = \
    sf.load_troubled_life_policy_data(file_path=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))

policy_histories_test = \
    sf.load_troubled_life_policy_data(file_path=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

policy_histories_length_train, max_policy_history_length_train = \
    sf.get_policy_history_lengths(policy_histories=policy_histories_train)

policy_histories_length_test, max_policy_history_length_test = \
    sf.get_policy_history_lengths(policy_histories=policy_histories_test)

max_policy_history_length = max(max_policy_history_length_train, max_policy_history_length_test)

In [8]:
# Pad the histories up to maximum length of both, train and test set

# policy_histories_train = \
#     sf.pad_troubled_life_policy_histories(policy_histories=policy_histories_train,
#                                           policy_histories_lengths=policy_histories_length_train,
#                                           max_policy_history_length=max_policy_history_length)
# 
# policy_histories_test = \
#     sf.pad_troubled_life_policy_histories(policy_histories=policy_histories_test,
#                                           policy_histories_lengths=policy_histories_length_test,
#                                           max_policy_history_length=max_policy_history_length)
# 
# # # Save padded data, since always generating and padding takes too long
# policy_histories_train.to_csv(path_or_buf=os.path.join(DATASETS_DIR, TRAINING_SET_DATA_FILE))
# policy_histories_test.to_csv(path_or_buf=os.path.join(DATASETS_DIR, TEST_SET_DATA_FILE))

# Extract features and labels from dataset as numpy.ndarray(s)
train_labels, train_features, train_seq_lengths =\
    sf.prepare_labels_features_lengths(policy_histories=policy_histories_train,
                                       policy_histories_lengths=policy_histories_length_train, 
                                       max_policy_history_length=max_policy_history_length)
test_labels, test_features, test_seq_lengths =\
    sf.prepare_labels_features_lengths(policy_histories=policy_histories_test,
                                       policy_histories_lengths=policy_histories_length_test, 
                                       max_policy_history_length=max_policy_history_length)

train_data = sf.TrainDataSet(train_labels=train_labels, train_features=train_features, train_seq_lengths=train_seq_lengths)

In [9]:
import tensorflow as tf
import numpy as np

tf.reset_default_graph()
tf.set_random_seed(42)

# Our hyperparameters
n_steps = max_policy_history_length
n_inputs = 2
n_layers = 3
n_neurons = 200
n_outputs = max_policy_history_length
learning_rate = 0.0001

# Placeholders for our input sequences
seq_length = tf.placeholder(tf.int32, [None], name="seq_length")
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs], name="X")
y = tf.placeholder(tf.int32, [None], name="y")

layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,
                                      activation=tf.nn.relu)
          for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=False)
outputs, states = tf.nn.dynamic_rnn(cell=multi_layer_cell, inputs=X, sequence_length=seq_length, dtype=tf.float32)

# We feed all layers' states (after the last timestep) into a fully connected layer of 2 neurons
# (1 per class, troubled or not). Softmax layer is next
logits = tf.layers.dense(states, n_outputs)
y_pred = tf.argmax(tf.nn.softmax(logits), axis=1)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

# Loss function and Adam Optimizer
loss = tf.reduce_mean(tf.cast(xentropy, tf.float32))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
conf_matrix = tf.confusion_matrix(labels=y, predictions=y_pred, num_classes=max_policy_history_length)
conf_matrix_image = tf.where(tf.equal(conf_matrix, 0), tf.zeros_like(conf_matrix), tf.zeros_like(conf_matrix) + 255)
conf_matrix_image = tf.reshape(tf.cast(conf_matrix_image, tf.float32),
                                  [1, max_policy_history_length, max_policy_history_length, 1])

train_loss_summary = tf.summary.scalar('Train_loss', loss)
train_accuracy_summary = tf.summary.scalar('Train_accuracy', accuracy)
train_confusion_matrix_summary_image = tf.summary.image('Train_conf_matrix_image', conf_matrix_image)

conf_matrix_str = tf.placeholder(tf.string, [max_policy_history_length, max_policy_history_length], name="conf_matrix_str")
test_confusion_matrix_summary_text = tf.summary.text('Test_conf_matrix_text', conf_matrix_str)

test_accuracy_summary = tf.summary.scalar('Test_accuracy', accuracy)

# Both are needed when using precision and recall metrics
init_g = tf.global_variables_initializer()
init_l = tf.local_variables_initializer()

file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())


In [10]:
from time import time

start = time()

n_epochs = 100
batch_size = 200

conf_labels = [i for i in range(max_policy_history_length)]

with tf.Session() as sess:
    init_g.run()
    init_l.run()
    
    for epoch in range(n_epochs):
        for i in range(train_data.num_examples // batch_size):
            y_batch, X_batch, seq_length_batch = train_data.next_batch(batch_size)
            
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})

        y_train_pred = y_pred.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})
        acc_test = accuracy.eval(feed_dict={X: test_features, y: test_labels, seq_length: test_seq_lengths})
        
        print("Epoch:", epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

        train_loss_summary_str = train_loss_summary.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})
        train_accuracy_summary_str = train_accuracy_summary.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})
        train_confusion_matrix_summary_image_str = train_confusion_matrix_summary_image.eval(feed_dict={X: X_batch, y: y_batch, seq_length: seq_length_batch})        
 
        conf_matrix_test = np.char.mod('%d', conf_matrix.eval(feed_dict={X: test_features, y: test_labels, seq_length: test_seq_lengths}))
        test_confusion_matrix_summary_text_str = test_confusion_matrix_summary_text.eval(feed_dict={conf_matrix_str: conf_matrix_test})   
              
        test_accuracy_summary_str = test_accuracy_summary.eval(feed_dict={X: test_features, y: test_labels, seq_length: test_seq_lengths})
        
        file_writer.add_summary(train_loss_summary_str, epoch)
        file_writer.add_summary(train_accuracy_summary_str, epoch)
        file_writer.add_summary(train_confusion_matrix_summary_image_str, epoch)
        file_writer.add_summary(test_confusion_matrix_summary_text_str, epoch)

        file_writer.add_summary(test_accuracy_summary_str, epoch)

file_writer.close()

elapsed = time() - start
print("Time to finish %d", elapsed)


Epoch: 0 Train accuracy: 0.865 Test accuracy: 0.844


Epoch: 1 Train accuracy: 0.85 Test accuracy: 0.823


Epoch: 2 Train accuracy: 0.81 Test accuracy: 0.8535


Epoch: 3 Train accuracy: 0.92 Test accuracy: 0.9015


Epoch: 4 Train accuracy: 0.87 Test accuracy: 0.876


Epoch: 5 Train accuracy: 0.94 Test accuracy: 0.903


Epoch: 6 Train accuracy: 0.94 Test accuracy: 0.8965


Epoch: 7 Train accuracy: 0.905 Test accuracy: 0.8615


Epoch: 8 Train accuracy: 0.905 Test accuracy: 0.8775


Epoch: 9 Train accuracy: 0.92 Test accuracy: 0.91


Epoch: 10 Train accuracy: 0.915 Test accuracy: 0.9055


Epoch: 11 Train accuracy: 0.925 Test accuracy: 0.907


Epoch: 12 Train accuracy: 0.92 Test accuracy: 0.9065


Epoch: 13 Train accuracy: 0.93 Test accuracy: 0.9155


Epoch: 14 Train accuracy: 0.935 Test accuracy: 0.911


Epoch: 15 Train accuracy: 0.94 Test accuracy: 0.9215


Epoch: 16 Train accuracy: 0.94 Test accuracy: 0.9535


Epoch: 17 Train accuracy: 0.925 Test accuracy: 0.9245


Epoch: 18 Train accuracy: 0.925 Test accuracy: 0.909


Epoch: 19 Train accuracy: 0.94 Test accuracy: 0.9325


Epoch: 20 Train accuracy: 0.945 Test accuracy: 0.927


Epoch: 21 Train accuracy: 0.915 Test accuracy: 0.914


Epoch: 22 Train accuracy: 0.875 Test accuracy: 0.8775


Epoch: 23 Train accuracy: 0.97 Test accuracy: 0.944


Epoch: 24 Train accuracy: 0.95 Test accuracy: 0.942


Epoch: 25 Train accuracy: 0.96 Test accuracy: 0.9475


Epoch: 26 Train accuracy: 0.94 Test accuracy: 0.942


Epoch: 27 Train accuracy: 0.98 Test accuracy: 0.96


Epoch: 28 Train accuracy: 0.97 Test accuracy: 0.9655


Epoch: 29 Train accuracy: 0.96 Test accuracy: 0.962


Epoch: 30 Train accuracy: 0.965 Test accuracy: 0.9615


Epoch: 31 Train accuracy: 0.95 Test accuracy: 0.958


Epoch: 32 Train accuracy: 0.995 Test accuracy: 0.9785


Epoch: 33 Train accuracy: 0.985 Test accuracy: 0.969


Epoch: 34 Train accuracy: 0.975 Test accuracy: 0.9725


Epoch: 35 Train accuracy: 0.99 Test accuracy: 0.969


Epoch: 36 Train accuracy: 0.965 Test accuracy: 0.96


Epoch: 37 Train accuracy: 0.985 Test accuracy: 0.983


Epoch: 38 Train accuracy: 0.96 Test accuracy: 0.9615


Epoch: 39 Train accuracy: 0.975 Test accuracy: 0.9775


Epoch: 40 Train accuracy: 0.99 Test accuracy: 0.987


Epoch: 41 Train accuracy: 0.985 Test accuracy: 0.967


Epoch: 42 Train accuracy: 0.99 Test accuracy: 0.987


Epoch: 43 Train accuracy: 0.995 Test accuracy: 0.98


Epoch: 44 Train accuracy: 0.97 Test accuracy: 0.978


Epoch: 45 Train accuracy: 0.985 Test accuracy: 0.9825


Epoch: 46 Train accuracy: 0.985 Test accuracy: 0.9875


Epoch: 47 Train accuracy: 0.995 Test accuracy: 0.988


Epoch: 48 Train accuracy: 0.995 Test accuracy: 0.975


Epoch: 49 Train accuracy: 0.98 Test accuracy: 0.9765


Epoch: 50 Train accuracy: 0.99 Test accuracy: 0.9845


Epoch: 51 Train accuracy: 0.98 Test accuracy: 0.981


Epoch: 52 Train accuracy: 0.99 Test accuracy: 0.9855


Epoch: 53 Train accuracy: 0.995 Test accuracy: 0.9915


Epoch: 54 Train accuracy: 1.0 Test accuracy: 0.99


Epoch: 55 Train accuracy: 0.985 Test accuracy: 0.991


Epoch: 56 Train accuracy: 0.995 Test accuracy: 0.9925


Epoch: 57 Train accuracy: 0.985 Test accuracy: 0.9895


Epoch: 58 Train accuracy: 1.0 Test accuracy: 0.989


Epoch: 59 Train accuracy: 0.995 Test accuracy: 0.995


Epoch: 60 Train accuracy: 0.995 Test accuracy: 0.9865


Epoch: 61 Train accuracy: 0.995 Test accuracy: 0.995


Epoch: 62 Train accuracy: 1.0 Test accuracy: 0.9925


Epoch: 63 Train accuracy: 0.985 Test accuracy: 0.98


Epoch: 64 Train accuracy: 0.995 Test accuracy: 0.9925


Epoch: 65 Train accuracy: 1.0 Test accuracy: 0.9825


Epoch: 66 Train accuracy: 0.995 Test accuracy: 0.9945


Epoch: 67 Train accuracy: 1.0 Test accuracy: 0.9915


Epoch: 68 Train accuracy: 1.0 Test accuracy: 0.994


Epoch: 69 Train accuracy: 0.99 Test accuracy: 0.99


Epoch: 70 Train accuracy: 0.92 Test accuracy: 0.9125


Epoch: 71 Train accuracy: 0.94 Test accuracy: 0.961


Epoch: 72 Train accuracy: 0.99 Test accuracy: 0.986


Epoch: 73 Train accuracy: 0.995 Test accuracy: 0.993


Epoch: 74 Train accuracy: 0.99 Test accuracy: 0.9895


Epoch: 75 Train accuracy: 1.0 Test accuracy: 0.9945


Epoch: 76 Train accuracy: 1.0 Test accuracy: 0.996


Epoch: 77 Train accuracy: 1.0 Test accuracy: 0.996


Epoch: 78 Train accuracy: 1.0 Test accuracy: 0.996


Epoch: 79 Train accuracy: 1.0 Test accuracy: 0.992


Epoch: 80 Train accuracy: 1.0 Test accuracy: 0.995


Epoch: 81 Train accuracy: 1.0 Test accuracy: 0.9965


Epoch: 82 Train accuracy: 1.0 Test accuracy: 0.9895


Epoch: 83 Train accuracy: 1.0 Test accuracy: 0.994


Epoch: 84 Train accuracy: 1.0 Test accuracy: 0.994


Epoch: 85 Train accuracy: 0.99 Test accuracy: 0.974


Epoch: 86 Train accuracy: 1.0 Test accuracy: 0.9955


Epoch: 87 Train accuracy: 1.0 Test accuracy: 0.9965


Epoch: 88 Train accuracy: 1.0 Test accuracy: 0.997


Epoch: 89 Train accuracy: 1.0 Test accuracy: 0.9945


Epoch: 90 Train accuracy: 1.0 Test accuracy: 0.9945


Epoch: 91 Train accuracy: 1.0 Test accuracy: 0.996


Epoch: 92 Train accuracy: 1.0 Test accuracy: 0.9925


Epoch: 93 Train accuracy: 1.0 Test accuracy: 0.9945


Epoch: 94 Train accuracy: 0.995 Test accuracy: 0.9955


Epoch: 95 Train accuracy: 1.0 Test accuracy: 0.994


Epoch: 96 Train accuracy: 0.99 Test accuracy: 0.9955


Epoch: 97 Train accuracy: 0.995 Test accuracy: 0.996


Epoch: 98 Train accuracy: 1.0 Test accuracy: 0.9915


Epoch: 99 Train accuracy: 1.0 Test accuracy: 0.992


Time to finish %d 531.8297789096832


In [11]:
531.8297789096832 / 60

8.86382964849472

In [12]:
.86382964849472 * 60

51.8297789096832