In [1]:
import tensorflow as tf
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1, #fraction of the memory of the GPU 
                            allow_growth = True,#If TRUE, use a fraction of between 0 and per process gpu memory fraction. If FALSE, pre-allocate entire GPU memory.
                            visible_device_list = "2,3") #GPUs 2nd and 3rd used out of 0,1,2,3.
config=tf.ConfigProto(gpu_options=gpu_options)

print(config.gpu_options.per_process_gpu_memory_fraction)
print(config.gpu_options.visible_device_list)

  from ._conv import register_converters as _register_converters


1
2,3


In [2]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline 

import time
import math

In [3]:
X_train=pd.read_csv('X_train.txt', header=None).as_matrix()
y_train = pd.read_csv('y_train.txt', header=None).as_matrix().ravel()
X_test = pd.read_csv('X_test.txt', header=None).as_matrix()
y_test = pd.read_csv('y_test.txt', header=None).as_matrix().ravel()

In [4]:
#y is a array. labelNum is number of distinct values in y, an integer. 
def convertDummy(y, labelNum):#This is a function one-hot encoding the classes. 
    labelNum = tf.constant(labelNum) #Construct pre-allocation labelNum.
    dummy = tf.one_hot(y, labelNum, axis=1) #Construct operations to create labelNum times columns
    sess = tf.Session(config=config) #Defining the session 
    dummy = sess.run(dummy) #Execute the session
    sess.close() #closing the session
    return dummy

In [5]:
#Batching datarows
#It will return a list of tuples.
def miniBatch(x, y, batchSize):
    numObs  = x.shape[0]
    batches = [] 
    batchNum = math.floor(numObs / batchSize)

    for i in range(batchNum - 1):
        xBatch = x[i * batchSize:(i + 1) * batchSize, :]
        yBatch = y[i * batchSize:(i + 1) * batchSize, :]
        batches.append((xBatch, yBatch))
    return batches

In [6]:
#Computing the length of sequences. It is needed for variable length sequences
def length(sequence):
    used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
    length = tf.reduce_sum(used, 1)
    length = tf.cast(length, tf.int32)
    sess = tf.Session(config=config)
    length = sess.run(length)
    sess.close()
    return length

In [7]:
max_timestep = 60
num_classes = 6
split_size = max_timestep

In [8]:
def split_time(m, split_size):
        r = m.shape[0]
        extend_row_size = np.math.ceil(r / split_size) * split_size - r
        m_p = np.expand_dims(np.pad(m, [(0, extend_row_size), (0, 0)], mode='constant'), axis=0)
        result = m_p.reshape((np.math.ceil(r / split_size), split_size, m.shape[1]))
        return result

In [9]:
Xtrain = split_time(X_train, split_size)
ytrain = split_time(convertDummy(y_train-1, num_classes), split_size)
Xtest = split_time(X_test, split_size)
ytest = split_time(convertDummy(y_test-1, num_classes), split_size)

In [10]:
print(Xtrain.shape)
print(ytrain.shape)
print(Xtest.shape)
print(ytest.shape)

(123, 60, 561)
(123, 60, 6)
(50, 60, 561)
(50, 60, 6)


In [11]:
def get_state_variables(batch_size, cell):
    # For each layer, get the initial state and make a variable out of it
    # to enable updating its value.
    state_variables = []
    for state_c, state_h in cell.zero_state(batch_size, tf.float32):
        state_variables.append(tf.contrib.rnn.LSTMStateTuple(
            tf.Variable(state_c, trainable=False),
            tf.Variable(state_h, trainable=False)))
    # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
    return tuple(state_variables)

In [12]:
def get_state_update_op(state_variables, new_states):
    # Add an operation to update the train states with the last state tensors
    update_ops = []
    for state_variable, new_state in zip(state_variables, new_states):
        # Assign the new state to the state variables on this layer
        update_ops.extend([state_variable[0].assign(new_state[0]),
                           state_variable[1].assign(new_state[1])])
    # Return a tuple in order to combine all update_ops into a single operation.
    # The tuple's actual value should not be used.
    return tf.tuple(update_ops)

In [13]:
num_layers=3
num_neurons = 128
num_inputs = Xtrain.shape[2] #561
num_classes = ytrain.shape[2] #6
num_steps=Xtrain.shape[1] #60

#Configuration
learning_rate = 0.01
batch_size =16
num_iterations = 20
#During training, you can feed any value you want to the keep_prob placeholder (typically 0.5)
train_keep_prob =0.5

X = tf.placeholder(tf.float32, [None, num_steps, num_inputs], name='input_placeholder')
y = tf.placeholder(tf.float32, [None, None, num_classes], name='labels_placeholder')
seq_length = tf.placeholder(tf.int32, [None])
keep_prob = tf.placeholder_with_default(1.0, shape=())

initializer = tf.random_uniform_initializer(-0.1, 0.1, seed=2)
#LSTM layers
#It can take a while for a recurrent network to learn to remember information from the last time step. 
#Initialize biases for LSTM’s forget gate to 1 to remember more by default
#This is default non-peephole implementation
lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=num_neurons, forget_bias=1.0, initializer=initializer, state_is_tuple=True) for layer in range(num_layers)]
#Dropout layer before and after each LSTM cells
lstm_cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob) for cell in lstm_cells]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells_drop, state_is_tuple=True)

#init_states = tf.placeholder(tf.float32, [num_layers, 2, batch_size, num_neurons])
init_states = multi_layer_cell.zero_state(tf.shape(X)[0], tf.float32)
init_states = tf.identity(init_states, "init_states")
state_per_layer_list = tf.unstack(init_states, axis=0)
rnn_tuple_state = tuple([tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)])


# time_major = False: (batch, time step, input); time_major = True: (time step, batch, input)
#The default approach to initializing the state of an RNN is to use a zero state
outputs, final_state = tf.nn.dynamic_rnn(multi_layer_cell, X, sequence_length= seq_length, time_major = False, initial_state=init_states, dtype=tf.float32) #[Batch_size, time_steps, num_neurons]

output_shape = tf.shape(outputs)#[Batch_size, time_steps, num_neurons]

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    #_current_state = np.zeros((num_layers, 2, batch_size, num_neurons))
    miniBatches = miniBatch(Xtrain, ytrain, batch_size)
    batchNum = len(miniBatches)
    for batch in miniBatches:
        xBatch = batch[0]
        yBatch = batch[1]
        seq_length_batch = length(xBatch)
        #if you use zero_state you do not need _current_state and assigning a placeholder for it. 
        outputs_val, final_state_val = sess.run([outputs, final_state], feed_dict={X: xBatch, y: yBatch, seq_length: seq_length_batch, keep_prob:train_keep_prob})
        
        

In [15]:
outputs_val

array([[[ 0.01136662,  0.00041855, -0.01009578, ...,  0.00943681,
         -0.00986547, -0.00067572],
        [ 0.02154926, -0.01915141, -0.02157292, ...,  0.00983158,
         -0.0113409 , -0.0146435 ],
        [ 0.03555227, -0.02163243, -0.02289967, ...,  0.03586564,
         -0.02328831,  0.00632061],
        ...,
        [-0.25427768, -0.19819836,  0.00894592, ...,  0.1123413 ,
         -0.03879779,  0.0866273 ],
        [-0.23441458, -0.23270148,  0.03763032, ...,  0.18720868,
          0.02642735,  0.16538405],
        [-0.18290052, -0.18131953,  0.05075862, ...,  0.14180906,
          0.03941987,  0.02738907]],

       [[ 0.00106072, -0.00515767,  0.00208643, ...,  0.01168074,
         -0.00242058, -0.01811988],
        [-0.01250202,  0.00193232, -0.02017763, ...,  0.01489723,
         -0.0203504 , -0.02318813],
        [-0.01863049, -0.01854887, -0.03328935, ...,  0.019247  ,
         -0.04525487, -0.04892085],
        ...,
        [-0.1576717 , -0.12688969,  0.00923921, ..., -

# ANOTHER EXAMPLE

In [None]:
import tensorflow as tf
import numpy as np

def get_state_variables(batch_size, cell):
    # For each layer, get the initial state and make a variable out of it
    # to enable updating its value.
    state_variables = []
    for state_c, state_h in cell.zero_state(batch_size, tf.float32):
        state_variables.append(tf.contrib.rnn.LSTMStateTuple(
            tf.Variable(state_c, trainable=False),
            tf.Variable(state_h, trainable=False)))
    # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
    return tuple(state_variables)


def get_state_update_op(state_variables, new_states):
    # Add an operation to update the train states with the last state tensors
    update_ops = []
    for state_variable, new_state in zip(state_variables, new_states):
        # Assign the new state to the state variables on this layer
        update_ops.extend([state_variable[0].assign(new_state[0]),
                           state_variable[1].assign(new_state[1])])
    # Return a tuple in order to combine all update_ops into a single operation.
    # The tuple's actual value should not be used.
    return tf.tuple(update_ops)

X_batch = np.array([ 
    [[0,1,2],[9,8,7]],#instance 0
    [[3,4,5],[0,0,0]],#instance 1
    [[6,7,8],[6,5,4]],#instance 2
    [[9,0,1],[3,2,1]]])#instance 3

n_steps = 2
n_inputs = 3
n_neurons = 5
n_layers = 2
batch_size = 4

X=tf.placeholder(tf.float32, [None, n_steps, n_inputs])

lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=n_neurons, forget_bias=1.0, state_is_tuple=True) for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells, state_is_tuple=True)

# For each layer, get the initial state. states will be a tuple of LSTMStateTuples.
states = get_state_variables(batch_size, multi_layer_cell)

# Unroll the LSTM
outputs, new_states = tf.nn.dynamic_rnn(multi_layer_cell, X, initial_state=states)

# Add an operation to update the train states with the last state tensors.
update_op = get_state_update_op(states, new_states)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
outputs_val, new_states_val, update_op_val = sess.run([outputs, new_states, update_op], {X:X_batch})
    
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)