<a href="https://colab.research.google.com/github/rahulsing/TF_RNN/blob/master/RNN_OCR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from platform import python_version
print(python_version())

2.7.15rc1


In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [0]:
import os
import gzip
import csv
import numpy as np
import tensorflow as tf


In [0]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt


In [0]:
from six.moves import urllib

In [9]:
print(np.__version__)
print(tf.__version__)

1.14.6
1.12.0


In [0]:
# http://ai.stanford.edu/~btaskar/ocr
URL_PATH="http://ai.stanford.edu/~btaskar/ocr/letter.data.gz"
DOWNLOADED_FILENAME="letter.data.gz"

In [0]:
def download_data():
  if not os.path.exists(DOWNLOADED_FILENAME):
    filename,_=urllib.request.urlretrieve(URL_PATH,DOWNLOADED_FILENAME)
    
  print('Found and verified file from this path: ', URL_PATH)
  print('Downloaded file: ',DOWNLOADED_FILENAME)


In [27]:
download_data()

Found and verified file from this path:  http://ai.stanford.edu/~btaskar/ocr/letter.data.gz
Downloaded file:  letter.data.gz


In [0]:
def read_lines():
  with gzip.open(DOWNLOADED_FILENAME,'rt') as f:
    reader=csv.reader(f,delimiter='\t')
    lines=list(reader)
    return lines

In [32]:
lines=read_lines()
len(lines)

52152

In [34]:
lines[1][:15]

['2', 'm', '3', '1', '2', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

In [0]:
def get_features_labels(lines):
  lines=sorted(lines,key=lambda x:int(x[0]))
  data,target=[],[]
  next_id=-1

  word=[]
  word_pixels=[]
  for line in lines:
    next_id=int(line[2])
    pixels=np.array([int(x) for x in line[6:134]])
    pixels=pixels.reshape((16,8))
    
    word_pixels.append(pixels)
    word.append(line[1])
    
    if next_id==-1:
      data.append(word_pixels)
      target.append(word)
      
      word=[]
      word_pixels=[]
      
  return data,target

In [0]:
data,target=get_features_labels(lines)

In [40]:
len(data),len(target)

(6877, 6877)

In [0]:
def pad_features_labels(data,target):
  max_length=max(len(x) for x in target)
  padding=np.zeros((16,8))
  
  data=[x+([padding]*(max_length-len(x))) for x in data]
  
  target=[x+(['']*(max_length-len(x))) for x in target]
              
  return np.array(data),np.array(target)

In [0]:
padded_data,padded_target=pad_features_labels(data,target)

In [67]:
len(padded_data),len(padded_traget)

(6877, 6877)

In [68]:
padded_traget[:10]

array([['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', '']],
      dtype='|S1')

In [69]:
padded_traget[200:210]

array([['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', ''],
       ['m', 'b', 'r', 'a', 'c', 'e', 's', '', '', '', '', '', '', '']],
      dtype='|S1')

In [0]:
word_length=len(padded_traget[0])

In [53]:
word_length

14

In [54]:
padded_data.shape

(6877, 14, 16, 8)

In [55]:
padded_data.shape[:2]

(6877, 14)

In [56]:
padded_data.shape[:2]+(-1,)

(6877, 14, -1)

In [0]:
reshaped_data=padded_data.reshape(padded_data.shape[:2]+(-1,))

In [58]:
reshaped_data.shape

(6877, 14, 128)

In [70]:
padded_target.shape+(26,)

(6877, 14, 26)

In [0]:
one_hot_target=np.zeros(padded_target.shape+(26,))

In [0]:
for index,letter in np.ndenumerate(padded_traget):
  if letter:
    one_hot_target[index][ord(letter)-ord('a')]=1

In [72]:
one_hot_target[0][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [0]:
shuffled_indices=np.random.permutation(len(reshaped_data))

shuffled_data=reshaped_data[shuffled_indices]

shuffled_target=one_hot_target[shuffled_indices]

In [0]:
split=int(0.66*len(shuffled_data))

train_data=shuffled_data[:split]
train_target=shuffled_target[:split]

test_data=shuffled_data[split:]
test_target=shuffled_target[split:]


In [75]:
train_data.shape

(4538, 14, 128)

In [76]:
_,num_steps,num_inputs=train_data.shape

train_target.shape

(4538, 14, 26)

In [0]:
num_classes=train_target.shape[2]


In [78]:
num_steps,num_inputs,num_classes

(14, 128, 26)

In [0]:
tf.reset_default_graph()

In [0]:
X=tf.placeholder(tf.float64,[None,num_steps,num_inputs])

In [0]:
y=tf.placeholder(tf.float64,[None,num_steps,num_classes])