In [1]:
from helper import create_batches
import tensorflow as tf
import numpy as np
import shelve
import joblib
import random
import time

#Cuz the file is inside 'code' directory
mount_point = "../shelved_data/"

with shelve.open(mount_point+'IAM_Data') as shelf:
    vocabulary = shelf['chars']
    list_of_images = shelf['list_of_images']
    image_labels = shelf['image_labels']
    
image_arrays = joblib.load(mount_point+'image_arrays')

#List_images ko sort karo
list_of_images.sort()

#Convert vocabulary to list
vocabulary = list(vocabulary)
#Sort so as to have the same ordering every time..
vocabulary.sort()
vocabulary.append("<Blank>")

In [2]:
#Model parameters
img_height = 104
img_width = 688
vocab_size = len(vocabulary)

#Common Hyper Parameters
alpha = 0.005
epochs = 200
batch_size = 32

#Conv_net Params
filter_size = 5
#Number of filters in each convolution layer
num_conv1,num_conv2,num_conv3 = (16, 32, 64)

#LSTM Params
rnn_hidden_units = 256

#FC_Params
fc_input_units,fc_hidden_units,fc_output_units = (2*rnn_hidden_units, 128, vocab_size)

In [3]:
# with tf.device('/gpu:0'):

#Weights Initializer
fc_initializer = tf.contrib.layers.xavier_initializer()
conv_initializer = tf.contrib.layers.xavier_initializer_conv2d()

#Weights for convolution layer
# -> filter_size = 5 so filter = (5 x 5)
#-> input_channels or (channels_in_image) = 1 
#-> output_channels or (num_of_filters) = num_conv1

wconv1_shape = [filter_size,filter_size,1,num_conv1]
wconv2_shape = [filter_size,filter_size,num_conv1,num_conv2]
wconv3_shape = [filter_size,filter_size,num_conv2,num_conv3]

wfc1_shape = [fc_input_units, fc_hidden_units]
wfc2_shape = [fc_hidden_units, fc_output_units]


#Biases for conv_layer (single value, thus shape is empty tensor [])
bconv_shape = []

#Biases for fc layer
bfc1_shape = [fc_hidden_units]
bfc2_shape = [fc_output_units]

#Initialize weights 
wconv1 = tf.Variable(conv_initializer(wconv1_shape))
wconv2 = tf.Variable(conv_initializer(wconv2_shape))
wconv3 = tf.Variable(conv_initializer(wconv3_shape))

wfc1 = tf.Variable(fc_initializer(wfc1_shape))
wfc2 = tf.Variable(fc_initializer(wfc2_shape))


#Intialize biases
bconv1 = tf.Variable(tf.zeros(bconv_shape))
bconv2 = tf.Variable(tf.zeros(bconv_shape))
bconv3 = tf.Variable(tf.zeros(bconv_shape))

bfc1 = tf.Variable(tf.zeros(bfc1_shape))
bfc2 = tf.Variable(tf.zeros(bfc2_shape))


#Model
#----------------------------------------------------------------------------#

#Input Image
inputs = tf.placeholder(tf.float32,shape=[None,img_height,img_width])

X = tf.reshape(inputs,(-1,img_height,img_width,1))

#-------------------Convolution-----------------------#
#1st Convolutional Layer
conv1 = tf.nn.relu(tf.nn.conv2d(input=X,filter=wconv1,padding='SAME',strides=[1,1,1,1]) + bconv1)

#1st Pooling layer
pool1 = tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#2nd Convolutional Layer
conv2 = tf.nn.relu(tf.nn.conv2d(input=pool1,filter=wconv2,padding='SAME',strides=[1,1,1,1]) + bconv2)

#2nd Pooling Layer
pool2 = tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#3rd Convolutional Layer
conv3 = tf.nn.relu(tf.nn.conv2d(input=pool2,filter=wconv3,padding='SAME',strides=[1,1,1,1]) + bconv3)

#3rd Pooling Layer
pool3 = tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

conv_out_height, conv_out_width = (int(img_height/(2**3)),int(img_width/(2**3)))

#----------------LSTM--------------------------#
#Treat a single pixel from each filter or feature map as an individual feature
#So number of features  = num_conv3 filters or feature maps
#length_of_sequence = width * height of the output from conv3 
lstm_input = tf.reshape(pool3,(-1,conv_out_height*conv_out_width,num_conv3))

seq_len = conv_out_height * conv_out_width

targets = tf.sparse_placeholder(tf.int32)

time_steps = np.array([seq_len]*batch_size)

# RNN Cell forward
cell_fw = tf.contrib.rnn.BasicLSTMCell(rnn_hidden_units)

# RNN Cell backward
cell_bw = tf.contrib.rnn.BasicLSTMCell(rnn_hidden_units)

(outputs_fw,outputs_bw),_ = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw,lstm_input,dtype=tf.float32)

#Concatenate the output from both cells (forward and backward)
outputs = tf.concat([outputs_fw,outputs_bw], 2)

print(outputs)

# shape = tf.shape(X)
# batch_s , max_time_steps = shape[0], shape[1]

# # Reshaping to apply the same weights over the timesteps
# outputs = tf.reshape(outputs, [-1, hidden_units])

# # Truncated normal with mean 0 and stdev=0.1
# # Tip: Try another initialization
# # see https://www.tensorflow.org/versions/r0.9/api_docs/python/contrib.layers.html#initializers
# W = tf.Variable(tf.truncated_normal([hidden_units,
#                                      vocab_size],
#                                     stddev=0.1))
# # Zero initialization
# # Tip: Is tf.zeros_initializer the same?
# b = tf.Variable(tf.constant(0., shape=[vocab_size]))

# # Doing the affine projection
# logits = tf.matmul(outputs, W) + b

# # Reshaping back to the original shape
# logits = tf.reshape(logits, [batch_s, -1, vocab_size])

# # Time major
# logits = tf.transpose(outputs, (1, 0, 2))

# #Calculate loss
# loss = tf.nn.ctc_loss(targets, logits, time_steps)
# cost = tf.reduce_mean(loss)

# #Optimize
# optimizer = tf.train.AdamOptimizer(learning_rate=alpha)
# train = optimizer.minimize(loss)

# # CTC decoder.

# #decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len)
# decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, time_steps)

# label_error_rate = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32),
#                                                    targets))


Tensor("concat:0", shape=(?, 1118, 512), dtype=float32)


## Save my MoDel

In [4]:
losses = []
saver = tf.train.Saver()

training_list = list_of_images[:256]
random.seed(100)
random.shuffle(training_list)

In [5]:
batches_x,batches_y = create_batches(batch_size,training_list,image_arrays,image_labels,vocabulary)
print(len(batches_x),len(batches_y))

8 8


In [6]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    feed = {inputs:batches_x[0].transpose([2,0,1]),targets:batches_y[0]}
    true_outputs = sess.run(outputs,feed_dict = feed)

In [7]:
true_outputs.shape

(32, 1118, 512)

In [8]:
# with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
#     sess.run(tf.global_variables_initializer())
    
#     for e in range(epochs): 
#         start_time = time.time()
#         total_cost,total_ler = 0.0,0.0
        
#         #Iterate through all images in a single epoch...
#         for b in range(len(batches_x)):
            
#             #Before feeding x reshape it as (batch_size,width,height)
#             feed = {inputs:batches_x[b].transpose([2,0,1]),targets:batches_y[b]}

#             sess.run(train,feed_dict=feed)
            
#         if e % 1 == 0:
#             cost_val,ler_val,d = sess.run([cost,label_error_rate,decoded], feed_dict=feed)
#             total_cost+=cost_val
#             total_ler+=ler_val
            
#             losses.append(total_cost)
# #             if e % 1 == 0:
# #                 outputs.append(d)
                
#         if e%10==0:
#             saver.save(sess,'../model/Lines_RNN_'+str(e))

#         end_time = time.time()       
#         time_taken = end_time - start_time

#         print("Epoch {}: cost = {} ler = {:.2f} - Time taken:{:.2f} sec".format(e,total_cost,total_ler,time_taken))

# # plt.plot(list(range(len(losses))),losses)
# # plt.xlabel('Epochs')
# # plt.ylabel('Loss')

## Predict using Model

In [9]:
# with tf.Session() as sess:
#     saver = tf.train.import_meta_graph('Basic_RNN_TF_API.ckpt.meta')
#     saver.restore(sess, '../Lines_RNN.ckpt')

#     X = sess.graph.get_tensor_by_name('Placeholder:0')
    
#     for _ in range(30): 
#         y_pred = sess.run('rnn/transpose:0', feed_dict={X: X_new})
#         #print(y_pred)
#         #print(y_pred.shape)
#         content += interpret(y_pred,vocabulary)
#         X_new = y_pred
        
# print(content)

In [10]:
# content = []
# for k in range(0,5):
#     val = outputs[k][0]
#     content.append(''.join([vocabulary[x] for x in np.asarray(val[1])]))
# print("\n".join(content))