<a href="https://colab.research.google.com/github/sonudoo/DSA/blob/master/Machine%20Learning/VariableLengthLSTMUsingTensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
# Mapping of characters to index and vice-versa

d = {0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}
d_reversed = {}
for key in d:
    d_reversed[d[key]] = key

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
data = [name.lower() + '\n' for name in open("/content/gdrive/My Drive/Colab Notebooks/datasets/DINOS/dinos.txt", "r").read().split()]
np.random.shuffle(data)
print("Longest name is of length:", max([len(x) for x in data]))

Longest name is of length: 27


In [0]:
# We will be generating dinosaur names of variable length.
# Since the dinosaur names are of variable length, we define maximum states

max_states = 27 # No more than 27 length names present
n_input = 27 # Number of units in input layer (26 alphabets + 1 EON character)
n_hidden = 100 # Number of units in hidden layer
n_output = 27 # Number of units in output layer (26 alphabets + 1 EON character)
m = len(data)

# Since the length is variable, we will be taking only a single example at a time to train

In [0]:
# X will be of dimension (m, l, n_input) m = Number of training example, l = length of the sequence, 
# We post-pad all sequences of length less than max_states with '\n'
# The output for each character is the next character. The output for '\n' is always '\n'
# We will be running one-hot encoding for each character

X_train = []
Y_train = []

for i in range(m):
    
    data[i] = data[i] + '\n' * (max_states - len(data[i]))
    
    input_string = data[i]
    output_string = data[i][1: ] + '\n'
    
    input_encoded = []
    for j in input_string:
        input_encoded.append([0.0 if k != d_reversed[j] else 1.0 for k in range(n_input)])
    input_encoded = np.array(input_encoded)
    
    output_encoded = []
    for j in output_string:
        output_encoded.append([0.0 if k != d_reversed[j] else 1.0 for k in range(n_input)])
    output_encoded = np.array(output_encoded)
    
    X_train.append(input_encoded)
    Y_train.append(output_encoded)

X_train = np.array(X_train)
Y_train = np.array(Y_train)

In [0]:
assert(X_train.shape == Y_train.shape)

In [0]:
# No need of batch size here, we will be training only one example at a time

X = tf.placeholder(dtype=tf.float32, shape=(1, max_states, n_input))
Y = tf.placeholder(dtype=tf.float32, shape=(1, max_states, n_output))

# Loss mask is used for calculating loss for variable length states.
# We set the mask value to 1 for all states less than the state length of the example and 0 for all others
# This loss mask would be multiplied by loss vector and summed over to get the total loss for training example

loss_mask = tf.placeholder(dtype=tf.float32, shape=(max_states, 1))

# Hidden layer

W_c = tf.Variable(tf.random_normal([n_hidden + n_input, n_hidden]))
W_u = tf.Variable(tf.random_normal([n_hidden + n_input, n_hidden]))
W_f = tf.Variable(tf.random_normal([n_hidden + n_input, n_hidden]))
W_o = tf.Variable(tf.random_normal([n_hidden + n_input, n_hidden]))

b_c = tf.Variable(tf.random_normal([1, n_hidden]))
b_u = tf.Variable(tf.random_normal([1, n_hidden]))
b_f = tf.Variable(tf.random_normal([1, n_hidden]))
b_o = tf.Variable(tf.random_normal([1, n_hidden]))

# Output layer

W_y = tf.Variable(tf.random_normal([n_hidden, n_output]))
b_y = tf.Variable(tf.random_normal([1, n_output]))

# Again, only one training example at a time

a = tf.zeros(dtype=tf.float32, shape=[1, n_hidden]) 
c = tf.zeros(dtype=tf.float32, shape=[1, n_hidden])

In [0]:
all_states_input = tf.unstack(X[0], axis=0) # Returns a list of tensors sliced column wise from X[0] (Only one training example)
all_states_label = tf.unstack(Y[0], axis=0) # Returns a list of tensors sliced column wise from Y[0] (Only one training example)
losses = []

for ith_state in range(max_states):
    
    # We extract the ith state input and label
    X_state_input = tf.reshape(all_states_input[ith_state], shape=[1, n_input])
    Y_state_label = tf.reshape(all_states_label[ith_state], shape=[1, n_output])
    
    # Stack it with the previous state
    a_prev_x_stacked = tf.concat([a, X_state_input], 1)
        
    # Forward prop
    
    _c = tf.nn.tanh(tf.math.add(tf.matmul(a_prev_x_stacked, W_c), b_c))
    g_u = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_u), b_u))
    g_f = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_f), b_f))
    g_o = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_o), b_o))
    c = tf.math.add(tf.math.multiply(g_u, _c), tf.math.multiply(g_f, c))
    a = tf.math.multiply(g_o, tf.nn.tanh(c))
    Y_state_output = tf.math.add(tf.matmul(a, W_y), b_y)
        
    # Calculate the loss for this state
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=Y_state_output, labels=Y_state_label)[0]
    
    losses.append(loss)
    
# We take the sum of losses of non-padded characters only

total_loss = tf.reduce_sum(tf.multiply(tf.reshape(tf.convert_to_tensor(losses), shape=[max_states, 1]), loss_mask))

optimizer = tf.train.AdamOptimizer(0.01).minimize(total_loss)


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [0]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [0]:
# Let's learn for 50 epochs

for epoch in range(50):
    loss_across_batches = []
    for i in range(X_train.shape[0]):
        loss_mask_array = np.array([1.0 if j < len(data[i].split('\n')[0]) else 0.0 for j in range(max_states)]).reshape(max_states, 1)
        loss = sess.run([optimizer, total_loss], feed_dict={X: [X_train[i]], Y: [Y_train[i]], loss_mask: loss_mask_array})[1]
        loss_across_batches.append(loss)
        print('\rEpoch:', epoch + 1, 'Ex:', i + 1, 'L:', round(loss), end='')
    print('\nOverall loss in Epoch:', epoch + 1, 'is', np.mean(np.array(loss_across_batches)))

Epoch: 1 Ex: 1536 L: 13.0
Overall loss in Epoch: 1 is 37.878246
Epoch: 2 Ex: 1536 L: 14.0
Overall loss in Epoch: 2 is 24.399221
Epoch: 3 Ex: 1536 L: 14.0
Overall loss in Epoch: 3 is 22.072067
Epoch: 4 Ex: 1536 L: 14.0
Overall loss in Epoch: 4 is 20.721678
Epoch: 5 Ex: 1536 L: 13.0
Overall loss in Epoch: 5 is 19.878742
Epoch: 6 Ex: 1536 L: 14.0
Overall loss in Epoch: 6 is 19.295713
Epoch: 7 Ex: 1536 L: 10.0
Overall loss in Epoch: 7 is 18.815449
Epoch: 8 Ex: 1536 L: 10.0
Overall loss in Epoch: 8 is 18.294321
Epoch: 9 Ex: 1536 L: 10.0
Overall loss in Epoch: 9 is 17.782557
Epoch: 10 Ex: 1536 L: 7.0
Overall loss in Epoch: 10 is 17.251635
Epoch: 11 Ex: 1536 L: 8.0
Overall loss in Epoch: 11 is 16.708853
Epoch: 12 Ex: 1536 L: 9.0
Overall loss in Epoch: 12 is 16.179726
Epoch: 13 Ex: 1536 L: 9.0
Overall loss in Epoch: 13 is 15.636093
Epoch: 14 Ex: 1536 L: 7.0
Overall loss in Epoch: 14 is 15.332886
Epoch: 15 Ex: 1536 L: 8.0
Overall loss in Epoch: 15 is 14.833701
Epoch: 16 Ex: 1536 L: 6.0
Overall 

In [0]:
# Let's generate a name now. 

# Build the prediction network, which takes a random input and then generates output of max_states length

a = tf.zeros(dtype=tf.float32, shape=[1, n_hidden]) 
c = tf.zeros(dtype=tf.float32, shape=[1, n_hidden])
Y_output = None

current_state_input = tf.random_normal(dtype=tf.float32, shape=[1, n_input])

for ith_state in range(max_states):
    
    # Stack it with the previous state
    a_prev_x_stacked = tf.concat([a, current_state_input], 1)
        
    # Forward prop
    
    _c = tf.nn.tanh(tf.math.add(tf.matmul(a_prev_x_stacked, W_c), b_c))
    g_u = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_u), b_u))
    g_f = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_f), b_f))
    g_o = tf.nn.sigmoid(tf.math.add(tf.matmul(a_prev_x_stacked, W_o), b_o))
    c = tf.math.add(tf.math.multiply(g_u, _c), tf.math.multiply(g_f, c))
    a = tf.math.multiply(g_o, tf.nn.tanh(c))
    Y_state_output = tf.nn.softmax(tf.math.add(tf.matmul(a, W_y), b_y))
    if ith_state != 0:
        Y_output = tf.concat([Y_output, Y_state_output], 0)
    else:
        Y_output = Y_state_output
    
    current_state_input = Y_state_output

In [0]:
# Print a name

"".join([d[x] for x in np.argmax(sess.run(Y_output), axis=1)]).split('\n')[0]

'madlosaurus'