# Model for NIST database lowercase characters
    - Model with the roposed architecture over the lowercase characters of NIST database
    - Use hsf_4 as test dataset.

In [2]:
import numpy as np
import pandas as pd
#import cv2

from scipy.misc import imresize, imrotate, imsave 


import matplotlib.pyplot as plt

# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'              # use grayscale output color heatmap


In [3]:
#Read NIST dataset 

#Read NIST database
path_NIST='/home/jorge/data/tesis/handwriting/databases/NIST/by_class/'


char_list_lo = ['61','62','63','64','65','66','67','68','69','6a','6b','6c','6d'
               ,'6e','6f','70','71','72','73','74','75','76','77','78','79','7a']


decode_lo={}
encode_lo={}
for i , c in enumerate(char_list_lo):
    char = str(unichr(int(c,16)))
    decode_lo[i] = char
    encode_lo[char] = i
print decode_lo              

{0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y', 25: 'z'}


In [4]:
# Generator of list of files in a folder and subfolders
import os
import shutil
import fnmatch

def gen_find(filepat,top):
    for path, dirlist, filelist in os.walk(top):
        for name in fnmatch.filter(filelist,filepat):
            yield os.path.join(path,name)

In [4]:

#Read train dataset
X_trn=[]
y_trn=[]
for char in char_list_lo:
    letter = str(unichr(int(char,16)))
    images_list = gen_find("*.png", path_NIST+char+'/train_'+char) 
    for img_name in images_list:
        img = plt.imread(img_name)
        
        #Transform
        img = img[32:96,32:96,0]
        
        X_trn += [img]
        y_trn += [encode_lo[letter]]

X_trn = 1. - np.array(X_trn)
y_trn = np.array(y_trn)
        
print(X_trn.shape)
print(y_trn.shape)

(178998, 64, 64)
(178998,)


In [5]:
hsf='/hsf_4/'

#Read test dataset
X_tst=[]
y_tst=[]
for char in char_list_lo:
    letter = str(unichr(int(char,16)))
    images_list = gen_find("*.png", path_NIST+char+hsf) 
    for img_name in images_list:
        img = plt.imread(img_name)
        
        #Transform
        img = img[32:96,32:96,0]
        
        X_tst += [img]
        y_tst += [encode_lo[letter]]

X_tst = 1. - np.array(X_tst)
y_tst = np.array(y_tst)
        
print(X_tst.shape)
print(y_tst.shape)

(12000, 64, 64)
(12000,)


In [9]:
X_trn = X_trn.reshape((X_trn.shape[0],1,64,64))
X_tst = X_tst.reshape((X_tst.shape[0],1,64,64))

In [6]:
#
#first model
#

from keras.models import Model
from keras.layers import Input, Convolution2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.optimizers import SGD

print('Build model 1...')
input_images = Input(shape=(1, 64, 64))

c11 = Convolution2D(64, 3, 3, border_mode='same', activation='relu')(input_images)
c12 = Convolution2D(64, 3, 3, border_mode='same', activation='relu')(c11)
c1_mp = MaxPooling2D((2, 2))(c12)

c21 = Convolution2D(128, 3, 3, border_mode='same', activation='relu')(c1_mp)
c22 = Convolution2D(128, 3, 3, border_mode='same', activation='relu')(c21)
c2_mp = MaxPooling2D((2, 2))(c22)

c31 = Convolution2D(256, 3, 3, border_mode='same', activation='relu')(c2_mp)
c32 = Convolution2D(256, 3, 3, border_mode='same', activation='relu')(c31)
c33 = Convolution2D(256, 3, 3, border_mode='same', activation='relu')(c32)
c3_mp = MaxPooling2D((2, 2))(c33)

conv_out = Flatten()(c3_mp)

dense1 = Dense(1024, activation='relu')(conv_out)
after_dp1 = Dropout(0.5)(dense1)

dense2 = Dense(1024, activation='relu')(after_dp1)
after_dp2 = Dropout(0.5)(dense2)

output = Dense(26, activation='softmax')(after_dp2)

Using Theano backend.
Using gpu device 0: GeForce GTX TITAN Black (CNMeM is disabled, cuDNN 5103)


Build model 1...


In [7]:
#Data augmentation in keras
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range = 15,       # 15º of random rotation
    width_shift_range = 0.20,  # 20% of random translation width
    height_shift_range = 0.20, # 20% of random translation height
    shear_range = 0.15,        # 5º of shear
    zoom_range = 0.20)         # +- 20% of zoom 



In [None]:
# Create the model
model_l = Model(input=input_images, output=output)

#Fit model  on batches with real-time data augmentation:
sgd = SGD(lr=0.01, decay=0.001, momentum=0.9, nesterov=True)
model_l.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist_l_1 = model_l.fit_generator(datagen.flow(X_trn, y_trn, batch_size=128),
                    samples_per_epoch=len(X_trn), nb_epoch=50, 
                    validation_data=(X_tst, y_tst))

print 'Done!'

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50

KeyboardInterrupt: 

# Accuracy for NIST database (2nd ed) lower case: 90,4%


In [12]:
#Save model
path_models = '/home/jorge/data/tesis/handwriting/p01_read_character/'

model_name = 'nist_lowercase_01'

json_string = model_l.to_json()
open(path_models + 'models/mdl_' + model_name + '.json', 'w').write(json_string)
model_l.save_weights(path_models + 'models/w_' + model_name + '.h5', overwrite=True)

# Save decode_target
#import pickle
#pickle.dump( decode_target_l, open( path_models + "models/unipen_decode_target_lowercase.p", "wb" ) )