In [1]:
import tensorflow as tf
import numpy as np
import os
import gzip
import csv

In [2]:
from six.moves import urllib

In [11]:
import matplotlib.pyplot as plt

In [3]:
url = 'http://ai.stanford.edu/~btaskar/ocr/letter.data.gz'
downloaded = 'letter.data.gz'

In [6]:
def download_data():
    if not os.path.exists(downloaded):
        filename, _ = urllib.request.urlretrieve(url, downloaded)
    print('found and verified url:', url)
    print('donwloaded file name: ', downloaded)

In [7]:
download_data()

found and verified url: http://ai.stanford.edu/~btaskar/ocr/letter.data.gz
donwloaded file name:  letter.data.gz


In [8]:
def read_lines():
    with gzip.open(downloaded, 'rt') as f:
        reader = csv.reader(f, delimiter='\t')
        lines = list(reader)
        
        return lines

In [9]:
lines = read_lines()

In [10]:
len(lines)

52152

In [12]:
lines[1][:15]

['2', 'm', '3', '1', '2', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']

In [13]:
def get_features_labels(lines):
    lines = sorted(lines, key= lambda x: int(x[0]))
    data, target = [], []
    next_id = -1
    word = []
    word_pixels = []
    
    for line in lines:
        next_id = int(line[2])
        pixels = np.array([int(x) for x in line[6:134]])
        pixels = pixels.reshape((16,8))
        word_pixels.append(pixels)
        word.append(line[1])
        if next_id == -1:
            data.append(word_pixels)
            target.append(word)
            word = []
            word_pixels = []
            
    return data, target

In [14]:
data, target = get_features_labels(lines)

In [16]:
len(data), len(target)

(6877, 6877)

In [17]:
#padding
def pad_features_labels(data, target):
    max_length = max(len(x) for x in target)
    padding = np.zeros((16,8))
    #pad the image data with the empty string images
    data = [x + ([padding]*(max_length - len(x))) for x in data]
    
    #pad th words with mpty string characters
    target = [x + (['']*(max_length-len(x))) for x in target]
    
    return np.array(data), np.array(target)

In [38]:
padded_data, padded_target = pad_features_labels(data,target)

In [39]:
len(padded_data), len(padded_target)

(6877, 6877)

In [43]:
padded_target[:10]

array([['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', ''],
       ['o', 'm', 'm', 'a', 'n', 'd', 'i', 'n', 'g', '', '', '', '', '']],
      dtype='<U1')

In [44]:
word_length = len(padded_target[0])

In [45]:
word_length

14

In [46]:
padded_data.shape

(6877, 14, 16, 8)

In [47]:
padded_data.shape[:2]

(6877, 14)

In [48]:
padded_data.shape[:2] + (-1,)

(6877, 14, -1)

In [50]:
reshaped_data = padded_data.reshape(padded_data.shape[:2] + (-1,))

In [51]:
reshaped_data.shape

(6877, 14, 128)

In [55]:
padded_target.shape

(6877, 14)

In [56]:
padded_target.shape + (26,)

(6877, 14, 26)

In [57]:
one_hot_target = np.zeros(padded_target.shape + (26,))


In [58]:
for index, letter in np.ndenumerate(padded_target):
    if letter:
        one_hot_target[index][ord(letter) - ord('a')] = 1

In [59]:
one_hot_target[0][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [60]:
#setup training data
shuffled_indices = np.random.permutation(len(reshaped_data))

shuffled_data = reshaped_data[shuffled_indices]
shuffled_target = one_hot_target[shuffled_indices]

In [61]:
split = int(.66 * len(shuffled_data))

train_data = shuffled_data[:split]
train_target = shuffled_target[:split]

test_data = shuffled_data[split:]
test_target = shuffled_target[split:]



In [62]:
train_data.shape

(4538, 14, 128)

In [63]:
_, num_steps, num_inputs = train_data.shape 

In [64]:
train_target.shape

(4538, 14, 26)

In [65]:
num_classes = train_target.shape[2]

In [66]:
num_steps, num_inputs, num_classes

(14, 128, 26)

In [67]:
tf.reset_default_graph()

In [68]:
X = tf.placeholder(tf.float64, [None, num_steps, num_inputs])
y = tf.placeholder(tf.float64, [None, num_steps, num_classes])

In [69]:
used = tf.sign(tf.reduce_max(tf.abs(X), reduction_indices=2))

length = tf.reduce_sum(used, reduction_indices=-1)
sequence_length = tf.cast(length, tf.int32)

In [70]:
sequence_length

<tf.Tensor 'Cast:0' shape=(?,) dtype=int32>

In [71]:
num_neurons = 300

In [72]:
cell = tf.nn.rnn_cell.GRUCell(num_neurons)

In [73]:
output, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float64, sequence_length=sequence_length)

In [76]:
output.shape

TensorShape([Dimension(None), Dimension(14), Dimension(300)])

In [77]:
weight = tf.Variable(tf.truncated_normal([num_neurons, num_classes],stddev=.01, dtype=tf.float64))

In [78]:
bias = tf.Variable(tf.constant(0.1, shape=[num_classes], dtype=tf.float64))

In [79]:
flattened_output = tf.reshape(output, [-1,num_neurons])

In [80]:
flattened_output

<tf.Tensor 'Reshape:0' shape=(?, 300) dtype=float64>

In [81]:
logits = tf.matmul(flattened_output, weight) + bias

In [82]:
logits_reshaped = tf.reshape(logits, [-1,num_steps, num_classes])

In [84]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y)

In [85]:
loss = tf.reduce_mean(cross_entropy)