<a href="https://colab.research.google.com/github/rahulsing/TF_RNN/blob/master/RNN_OCR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from platform import python_version
print(python_version())

2.7.15rc1


In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [0]:
import os
import gzip
import csv
import numpy as np
import tensorflow as tf


In [0]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt


In [0]:
from six.moves import urllib

In [7]:
print(np.__version__)
print(tf.__version__)

1.14.6
1.12.0


In [0]:
# http://ai.stanford.edu/~btaskar/ocr
URL_PATH="http://ai.stanford.edu/~btaskar/ocr/letter.data.gz"
DOWNLOADED_FILENAME="letter.data.gz"

In [0]:
def download_data():
  if not os.path.exists(DOWNLOADED_FILENAME):
    filename,_=urllib.request.urlretrieve(URL_PATH,DOWNLOADED_FILENAME)
    
  print('Found and verified file from this path: ', URL_PATH)
  print('Downloaded file: ',DOWNLOADED_FILENAME)


In [10]:
download_data()

Found and verified file from this path:  http://ai.stanford.edu/~btaskar/ocr/letter.data.gz
Downloaded file:  letter.data.gz


In [0]:
def read_lines():
  with gzip.open(DOWNLOADED_FILENAME,'rt') as f:
    reader=csv.reader(f,delimiter='\t')
    lines=list(reader)
    return lines

In [12]:
lines=read_lines()
len(lines)

52152

In [13]:
lines[1][:15]

['2', 'm', '3', '1', '2', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

In [0]:
def get_features_labels(lines):
  lines=sorted(lines,key=lambda x:int(x[0]))
  data,target=[],[]
  next_id=-1

  word=[]
  word_pixels=[]
  for line in lines:
    next_id=int(line[2])
    pixels=np.array([int(x) for x in line[6:134]])
    pixels=pixels.reshape((16,8))
    
    word_pixels.append(pixels)
    word.append(line[1])
    
    if next_id==-1:
      data.append(word_pixels)
      target.append(word)
      
      word=[]
      word_pixels=[]
      
  return data,target

In [0]:
data,target=get_features_labels(lines)

In [16]:
len(data),len(target)

(6877, 6877)

In [0]:
def pad_features_labels(data,target):
  max_length=max(len(x) for x in target)
  padding=np.zeros((16,8))
  
  data=[x+([padding]*(max_length-len(x))) for x in data]
  
  target=[x+(['']*(max_length-len(x))) for x in target]
              
  return np.array(data),np.array(target)

In [0]:
padded_data,padded_target=pad_features_labels(data,target)

In [20]:
len(padded_data),len(padded_target)

(6877, 6877)

In [21]:
padded_target[:10]

array([['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', '']],
      dtype='|S1')

In [22]:
padded_target[200:210]

array([['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', '']],
      dtype='|S1')

In [0]:
word_length=len(padded_target[0])

In [26]:
word_length

14

In [27]:
padded_data.shape

(6877, 14, 16, 8)

In [28]:
padded_data.shape[:2]

(6877, 14)

In [29]:
padded_data.shape[:2]+(-1,)

(6877, 14, -1)

In [0]:
reshaped_data=padded_data.reshape(padded_data.shape[:2]+(-1,))

In [31]:
reshaped_data.shape

(6877, 14, 128)

In [32]:
padded_target.shape+(26,)

(6877, 14, 26)

In [0]:
one_hot_target=np.zeros(padded_target.shape+(26,))

In [0]:
for index,letter in np.ndenumerate(padded_target):
  if letter:
    one_hot_target[index][ord(letter)-ord('a')]=1

In [36]:
one_hot_target[0][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [0]:
shuffled_indices=np.random.permutation(len(reshaped_data))

shuffled_data=reshaped_data[shuffled_indices]

shuffled_target=one_hot_target[shuffled_indices]

In [0]:
split=int(0.66*len(shuffled_data))

train_data=shuffled_data[:split]
train_target=shuffled_target[:split]

test_data=shuffled_data[split:]
test_target=shuffled_target[split:]


In [39]:
train_data.shape

(4538, 14, 128)

In [40]:
_,num_steps,num_inputs=train_data.shape

train_target.shape

(4538, 14, 26)

In [0]:
num_classes=train_target.shape[2]


In [42]:
num_steps,num_inputs,num_classes

(14, 128, 26)

In [0]:
tf.reset_default_graph()

In [0]:
X=tf.placeholder(tf.float64,[None,num_steps,num_inputs])

In [0]:
y=tf.placeholder(tf.float64,[None,num_steps,num_classes])

In [0]:
used=tf.sign(tf.reduce_max(tf.abs(X),reduction_indices=2))


length=tf.reduce_sum(used,reduction_indices=1)

sequence_length=tf.cast(length,tf.int32)



In [56]:
sequence_length

<tf.Tensor 'Cast_1:0' shape=(?,) dtype=int32>

In [0]:
# Every Layer will have 300 neurons, 
num_neurons=300 #Hyper parameter, tunable

In [0]:
# GRUCell, using long memory cell, for better performance 
cell=tf.nn.rnn_cell.GRUCell(num_neurons)

In [0]:
# Create RNN in TF, simply using tf library: tf.nn.dynamic_rnn
# input argument:
# cell : memory cell that is has to unroll through time, based on no. of steps 
# X is placeholder for training data
# sequence_length: optional paratmer, used for correctness of our RNN , not performance

# result (tuple):  predicted output and last internal state of the RNN(not required now)
output,_=tf.nn.dynamic_rnn(cell,X,dtype=tf.float64,sequence_length=sequence_length)

In [58]:
output.shape

TensorShape([Dimension(None), Dimension(14), Dimension(300)])

In [0]:
#TensorShape([Dimension(None), Dimension(14), Dimension(300)])
#1st: batch size
# 2nd : 14 , no of time period in out data
# 3rd: 300,  no of neuron in each layer, or in a memory cell

In [0]:
weight=tf.Variable(tf.truncated_normal([num_neurons,num_classes],stddev=0.01,dtype=tf.float64))

In [0]:
bias=tf.Variable(tf.constant(0.1,shape=[num_classes],dtype=tf.float64))

In [0]:
flattened_output=tf.reshape(output,[-1,num_neurons])

In [63]:
flattened_output

<tf.Tensor 'Reshape:0' shape=(?, 300) dtype=float64>

In [0]:
logits=tf.matmul(flattened_output,weight)+bias

In [0]:
logits_reshaped=tf.reshape(logits,[-1,num_steps,num_classes])

In [68]:
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [0]:
loss=tf.reduce_mean(cross_entropy)

In [0]:
mistakes=tf.not_equal(tf.argmax(y,2),tf.argmax(logits_reshaped,2))
mistakes=tf.cast(mistakes,tf.float64)

In [0]:
mask=tf.sign(tf.reduce_max(tf.abs(y),reduction_indices=2))

In [0]:
mistakes*=mask

In [0]:
mistakes=tf.reduce_sum(mistakes,reduction_indices=1)
mistakes/=tf.cast(sequence_length,tf.float64)

In [0]:
error=tf.reduce_mean(mistakes)

In [0]:
optimizer=tf.train.RMSPropOptimizer(0.002)
gradient=optimizer.compute_gradients(loss)

optimize=optimizer.apply_gradients(gradient)

In [0]:
def batches(data,target,batch_size):
  epoch=0
  offset=0
  
  while True:
    old_offset=offset
    offset=(offset+batch_size)%(target.shape[0]-batch_size)
    
    if offset<old_offset:
      shuffled_indices=np.random.permutation(len(data))
      
      data=data[shuffled_indices]
      target=target[shuffled_indices]
      
   
    batch_data=data[offset:(offset+batch_size),:]
    batch_target=target[offset:(offset+batch_size),:]
    
    yield batch_data,batch_target
    

In [0]:
batch_size=20
batches=batched(train_data,train_target,batch_size)

In [94]:
batches

<generator object batched at 0x7f98b90377d0>

In [0]:
epochs=5

In [100]:
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  
  for index, batch in enumerate(batches):
    len(batch)
    batch_data=batch[0]
    batch_target=batch[1]
    #print(batch[2])
    epoch=0
    
    if epoch>=epochs:
      break
    
    feed={X:batch_data,y:batch_target}
    
    train_error,_=sess.run([error,optimizer],feed)
    
    print('{}: {:3.6f}%'.format(index+1,100*train_error))
    
  test_feed={X:test_data,y:test_target}
  test_error,_=sess.run([error,optimize],test_feed)
  
  print('Test error: {:3.6f}%'.format(100*test_error))

TypeError: ignored