In [2]:
%matplotlib inline

# Imports
import h5py
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import matplotlib.pyplot as plt
import cv2
import math
import random

In [3]:
# CONSTANTS
imgSize = 54

In [None]:
# Define some useful Functions
def get_box_data(index, hdf5_data):
    """
    get `left, top, width, height` of each picture
    :param index:
    :param hdf5_data:
    :return:
    """
    meta_data = dict()
    meta_data['height'] = []
    meta_data['label'] = []
    meta_data['left'] = []
    meta_data['top'] = []
    meta_data['width'] = []

    def print_attrs(name, obj):
        vals = []
        if obj.shape[0] == 1:
            vals.append(obj[0][0])
        else:
            for k in range(obj.shape[0]):
                vals.append(int(hdf5_data[obj[k][0]][0][0]))
        meta_data[name] = vals

    box = hdf5_data['/digitStruct/bbox'][index]
    hdf5_data[box[0]].visititems(print_attrs)
    return meta_data

def get_name(index, hdf5_data):
    name = hdf5_data['/digitStruct/name']
    return ''.join([chr(v[0]) for v in hdf5_data[name[index][0]].value])

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

def oneHot(num, length):
    arr = np.zeros(length)
    arr[num-1]=1
    return arr

def maybeLoadData(folder, variations):
    import os.path
    import pickle
    file_path=folder + '.pk'
    if(os.path.exists(file_path) is False):
        imageData, imageLengths, imageDigits = loadData(folder, variations)
        data = { 'imageData': imageData, 'imageLengths': imageLengths, 'imageDigits': imageDigits}
        pickle.dump(data , open( file_path, "wb" ))

    data = pickle.load( open( file_path, "rb" ) );
    return data['imageData'], data['imageLengths'], data['imageDigits']

def loadData(folder, variations):
    #First load the data using h5py
    f = h5py.File(folder + '/' + 'digitStruct.mat')
    #Get the number of images to iterate through them
    length = len(f['/digitStruct/name'])

    #length = 10;   #TestLength

    imageData = np.zeros([length, imgSize,imgSize,1]).astype(np.float32)
    imageLengths = np.zeros([length, 5]).astype(np.int)
    imageDigits = np.zeros([length,5,11]).astype(np.int)

    #Iterate through the images
    for i in range(0,length):
        if(i%500==0): #In case of error, comment this line
            print("Loaded {} out of {}".format(i,length))

        #Read the image
        imageFile = folder + '/' + get_name(i,f)
        img = cv2.imread(imageFile)

        #Read the box data & get the bounding box for all characters (using first and last digit)
        boxData=get_box_data(i, f)

        firstTop = int(boxData['top'][0])
        firstLeft = int(boxData['left'][0])
        firstRight = int(boxData['left'][0]) + int(boxData['width'][0])
        firstBottom = int(boxData['top'][0]) + int(boxData['height'][0])

        l = len(boxData['top'])
        lastTop = int(boxData['top'][l-1])
        lastLeft = int(boxData['left'][l-1])
        lastRight = int(boxData['left'][l-1]) + int(boxData['width'][l-1])
        lastBottom = int(boxData['top'][l-1]) + int(boxData['height'][l-1])

        top = min(firstTop, lastTop)
        left = min(firstLeft, lastLeft)
        right = max(firstRight, lastRight)
        bottom = max(firstBottom, lastBottom)

        height = bottom-top
        width = right-left
        vertMiddle = (bottom+top)//2
        horCenter = (left+right)//2

        if(variations==True):
            top = vertMiddle - ((1.3*height)//2)
            bottom = vertMiddle + ((1.3*height)//2)
            left = horCenter - ((1.3*width)//2)
            right = horCenter + ((1.3*width)//2)

        top = max(top, 0)
        left = max(left, 0)
        right = min(right, img.shape[1])
        bottom = min(bottom, img.shape[0])

            #One image has incorrect label length of  6
#         if(len(boxData['label'])>5):
#             cv2.imshow('image',img)
#             cv2.waitKey(0)

        #Check to see the bounding box
        #cv2.rectangle(img,(left,top),(right, bottom),(0,255,0),3)

        #Extract only the RoI for faster pre-processing
        img = img[top:bottom, left:right]

        #Convert to gray scale if in color
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        #Histogram correction
#         clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(6,6))
#         img = clahe.apply(img)

        #Length of digits
        numberOfDigits = len(boxData['label'])

        #Resize the image to 64x64
        if(variations==True):
            img = cv2.resize(img,(64, 64), interpolation = cv2.INTER_LANCZOS4)
            leftStart=random.randint(0,9)
            topStart=random.randint(0,9)
            img = img[topStart:(topStart+imgSize), leftStart:(leftStart+imgSize)]
        else:
            img = cv2.resize(img,(imgSize, imgSize), interpolation = cv2.INTER_LANCZOS4)

        #Copy the data
        oneImageData = np.resize(img, (imgSize,imgSize,1)).astype(np.float32)

        oneImageData=oneImageData/255.0
        oneImageData=oneImageData-0.5

        imageData[i] = oneImageData
        first=0
        if(numberOfDigits>5):
            numberOfDigits=5
            print(boxData['label'])
            first=1

        imageLengths[i] = oneHot(numberOfDigits,5)

        for k in range(0,5):
            if(k<numberOfDigits):
                imageDigits[i,k,:]=oneHot(int(boxData['label'][int(k+first)]),11)
            else:
                imageDigits[i,k,10]=1


        #Show the original image
        #cv2.imshow('image',orig)
        #cv2.waitKey(0)

        #Show the processed image
        #cv2.imshow('image',img)
        #cv2.waitKey(0)

    shuffledIndexes  = np.arange(length)
    np.random.shuffle(shuffledIndexes)

    imageData = imageData[shuffledIndexes,:]
    imageLengths = imageLengths[shuffledIndexes,:]
    imageDigits = imageDigits[shuffledIndexes,:,:]
    return imageData,imageLengths, imageDigits



In [None]:
trainPath = '/Users/rparundekar/dataspace/SVHN/train'
trainPath = '/Users/rparundekar/dataspace/SVHN/test'

print("Loading data")
trainImageData, trainImageLengths,trainImageDigits = maybeLoadData('train', True)
trainImageData = trainImageData.reshape((-1,54,54,1))
print("Training data images: {}".format(trainImageData.shape))
print("              length: {}".format(trainImageLengths.shape))
print("              digits: {}".format(trainImageDigits.shape))

print("Loading test & validation data")
folderImageData, folderImageLengths,folderImageDigits = maybeLoadData('test', False)
folderImageData=folderImageData.reshape((-1,54,54,1))
print("Folder test data images: {}".format(folderImageData.shape))
print("          length: {}".format(folderImageLengths.shape))
print("          digits: {}".format(folderImageDigits.shape))

half = len(folderImageData)//2
validationImageData = folderImageData[0:half,:]
validationImageLengths = folderImageLengths[0:half,:]
validationImageDigits= folderImageDigits[0:half,:,:]
print("Validation data images: {}".format(validationImageData.shape))
print("                length: {}".format(validationImageLengths.shape))
print("                digits: {}".format(validationImageDigits.shape))

testImageData = folderImageData[half:,:]
testImageLengths = folderImageLengths[half:,:]
testImageDigits= folderImageDigits[half:,:,:]
print("Test data images: {}".format(testImageData.shape))
print("          length: {}".format(testImageLengths.shape))
print("          digits: {}".format(testImageDigits.shape))

print("Data loaded.")