In [13]:
#!/disk/scratch/mlp/miniconda2/bin/python
import os
import tensorflow as tf
import numpy as np

# Utilities
import datetime
import time
import pickle
import matplotlib.pyplot as plt

In [14]:
# #check necessary environment variables are defined
# assert 'MLP_DATA_DIR' in os.environ, 'An environment variable MLP_DATA_DIR must be set to the path containing MLP data before running script.'
# # assert 'OUTPUT_DIR' in os.environ, 'An environment variable OUTPUT_DIR must be set to the path to write output to before running script.'
# save_point = '/home/s1687487/0304/0/exp/' #'/home/s1687487/0228/0/exp/'

In [15]:
# Import data
# batch_size = 500
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

batch_size = 128
from mlp.data_providers import MSD10GenreDataProvider, MSD25GenreDataProvider
train_data = MSD10GenreDataProvider('train', batch_size = batch_size, max_num_batches=5)
valid_data = MSD10GenreDataProvider('valid', batch_size = batch_size, max_num_batches=1)

In [16]:
# Parameters
learning_rate = 0.001
# training_iters = 100000
training_epochs = 5
display_step = 10

# Network Parameters
n_input = 25 # MNIST data input (img shape: 28*28)
n_steps = 120 # timesteps
n_classes = 10 # MNIST total classes (0-9 digits)

In [17]:
def experiments():
    for n_layers in n_layers_list:
        for n_hidden in n_hidden_list:
            # tf Graph input
            x = tf.placeholder("float", [None, n_steps, n_input])
            y = tf.placeholder("float", [None, n_classes])

            # Define weights
            weights = {
                'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
            }
            biases = {
                'out': tf.Variable(tf.random_normal([n_classes]))
            }

            tf.add_to_collection('vars', weights['out'])
            tf.add_to_collection('vars', biases['out'])
            saver = tf.train.Saver()

            def Network(x, weights, biases): # Fully BPTT
                # Define a lstm cell with tensorflow
                cell = tf.nn.rnn_cell.GRUCell(n_hidden)            
                stacked_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[cell] * n_layers)

                outputs, states = tf.nn.dynamic_rnn(stacked_cell, x, dtype=tf.float32)
                # [outputs' size is (?, n_steps, n_hidden), we want the last element, with index 120-1 for MSD
                # size of last_output is then (?, n_hidden)

            #     print 'state_size = ', lstm_cell.state_size

                last_output = outputs[:, outputs.get_shape()[1] - 1, :]
                # Linear activation, using rnn inner loop last output
                return tf.matmul(last_output, weights['out']) + biases['out']

            pred = Network(x, weights, biases)

            # Define loss and optimizer
            cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

            # Evaluate model
            correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

            # Initializing the variables
            init = tf.global_variables_initializer()

            # Performance pickles
            err_tr = np.zeros(training_epochs)
            acc_tr = np.zeros(training_epochs)
            err_val = np.zeros(training_epochs)
            acc_val = np.zeros(training_epochs)

            with tf.Session() as sess:
                print("Optimization Starts! Dim(hidden)={0:d}, n_layers={1:d}"
                      .format(n_hidden,n_layers))
                sess.run(init) #initial run

                for e in range(training_epochs):                              
                    if e == 0:
                        start = time.time()
                        num_batches_tr = 0
                        num_batches_val = 0

                    running_error = 0.
                    running_accuracy = 0.
                    for input_batch, target_batch in train_data:
                        if e == 0:
                            num_batches_tr += 1
                        input_batch = input_batch.reshape((batch_size,n_steps,n_input))

                        _, batch_error, batch_acc = sess.run(
                            [optimizer, cost, accuracy], 
                            feed_dict={x: input_batch, y: target_batch})
                        running_error += batch_error
                        running_accuracy += batch_acc
            #             if num_batches_tr % 100 == 0:
            #                 print('{0:02d}th batch: err(train)={1:.2f} acc(train)={2:.2f}'
            #                       .format(num_batches_tr,running_error/num_batches_tr,running_accuracy/num_batches_tr))                      
                    running_error /= num_batches_tr
                    err_tr[e] = running_error
                    running_accuracy /= num_batches_tr
                    acc_tr[e] = running_accuracy

                    if ((e+1) % display_step == 0) or (e == 0):
                        end = time.time()
                        print('End of epoch {0:02d}: err(train)={1:.2f} acc(train)={2:.2f} ,,, taking {3:.1f}(sec)'
                              .format(e + 1, running_error, running_accuracy, end-start))

                        start = time.time()            

                        valid_error = 0.
                        valid_accuracy = 0.
                        for input_batch, target_batch in valid_data:
                            if e == 0:
                                num_batches_val += 1
                            input_batch = input_batch.reshape((batch_size,n_steps,n_input))

                            batch_error, batch_acc = sess.run(
                                [cost, accuracy], 
                                feed_dict={x: input_batch, y: target_batch})
                            valid_error += batch_error
                            valid_accuracy += batch_acc
                        valid_error /= num_batches_val
                        err_val[e] = valid_error
                        valid_accuracy /= num_batches_val
                        acc_val[e] = valid_accuracy

                        end = time.time()
                        print('                 err(valid)={0:.2f} acc(valid)={1:.2f} ,,, taking {2:.1f}(sec) '
                               .format(valid_error, valid_accuracy, end-start))

                        start = time.time()

                #Save model - save session
                sname = 'params_lstm_h' + str(n_hidden) +'_ly' + str(n_layers)
                saver.save(sess, sname)
    #             saver.save(sess, save_point+sname)
                #...performance as well - pickle
                lstm_prf = {"err_tr": err_tr, "err_val": err_val, "acc_tr": acc_tr, "acc_val": acc_val}
                fname = "lstm_performances_h" + str(n_hidden) +'_ly' + str(n_layers) + ".p" 
                pickle.dump(lstm_prf, open(fname,"wb"))
    #             pickle.dump(lstm_prf, open(save_point+fname,"wb"))

                #Reset the data
                train_data.reset()
                valid_data.reset()
                print ''

            #Reset the graph per each iteration
            tf.reset_default_graph()

    print("Optimization Finished!")

In [18]:
n_hidden_list = [5,10,20,30,50] # dim(hidden layer) <- run this with 10 and 50 after this...
n_layers_list = [1,2,3]
experiments()

Optimization Starts! Dim(hidden)=5, n_layers=1
End of epoch 01: err(train)=2.98 acc(train)=0.07 ,,, taking 0.3(sec)
                 err(valid)=3.09 acc(valid)=0.10 ,,, taking 0.0(sec) 

Optimization Starts! Dim(hidden)=10, n_layers=1
End of epoch 01: err(train)=3.24 acc(train)=0.10 ,,, taking 0.3(sec)
                 err(valid)=3.18 acc(valid)=0.11 ,,, taking 0.0(sec) 

Optimization Starts! Dim(hidden)=20, n_layers=1
End of epoch 01: err(train)=3.76 acc(train)=0.09 ,,, taking 0.4(sec)
                 err(valid)=3.68 acc(valid)=0.12 ,,, taking 0.0(sec) 

Optimization Starts! Dim(hidden)=30, n_layers=1
End of epoch 01: err(train)=4.11 acc(train)=0.10 ,,, taking 0.5(sec)
                 err(valid)=3.72 acc(valid)=0.16 ,,, taking 0.1(sec) 

Optimization Starts! Dim(hidden)=50, n_layers=1
End of epoch 01: err(train)=3.84 acc(train)=0.13 ,,, taking 0.8(sec)
                 err(valid)=3.54 acc(valid)=0.12 ,,, taking 0.1(sec) 

Optimization Starts! Dim(hidden)=5, n_layers=2
End of epoch 0