In [132]:
import h5py
import cv2
import keras
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import PIL
from PIL import Image
import pandas as pd
from random import shuffle
import os
import random
import pickle
from keras.models import Sequential
from keras.layers import *
from keras.layers.advanced_activations import PReLU
#utilities help us transform our data
from keras.utils import * 
from sklearn.cross_validation import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger

In [2]:
class DigitStructure:
    
    def __init__(self, _filePath):
        
        self.loadFile = self.load_datafile(_filePath)
        self.digitStruct = self.readDigitStruct(self.loadFile)
        self.name = self.digitStruct['name']
        self.boxLabels = self.digitStruct["bbox"]
        
    def load_datafile(self, filepath):
        
        return h5py.File(filepath,'r')
    
    def readDigitStruct(self, datafile):
        
        return datafile["digitStruct"]
    
    def getImageName(self, index):
        
        names = []
        for i in self.loadFile[self.name[index][0]].value:
            names.append(chr(i[0]))
        return ''.join(names)
    
    def bboxExtractor(self, attr):
        
        if (len(attr) > 1):
            attr = [self.loadFile[attr.value[j].item()].value[0][0] for j in range(len(attr))]
        else:
            attr = [attr.value[0][0]]
        return attr
    
    # getBbox returns a dict of data for the n(th) bbox. 
    def getBboxAttributes(self,index):
        
        bbox = {}
        bb = self.boxLabels[index].item()
        bbox['height'] = self.bboxExtractor(self.loadFile[bb]["height"])
        bbox['label'] = self.bboxExtractor(self.loadFile[bb]["label"])
        bbox['left'] = self.bboxExtractor(self.loadFile[bb]["left"])
        bbox['top'] = self.bboxExtractor(self.loadFile[bb]["top"])
        bbox['width'] = self.bboxExtractor(self.loadFile[bb]["width"])
        
        return bbox
            
    def getDigitStructure(self,n):
        
        s = self.getBboxAttributes(n)
        
        s['name']=self.getImageName(n)
        return s

    # getAllDigitStructure returns all the digitStruct from the input file.     
    def getAllDigitStructure(self):
        
        struct = []
        for i in range(len(self.name)):
            struct.append(self.getDigitStructure(i))
        return struct
    
    def getAllDigitStructure_ByDigit(self):
        
        
        digitDictionary = self.getAllDigitStructure()
        result = []
        structCnt = 1
        for i in range(len(digitDictionary)):
            item = { 'filename' : digitDictionary[i]["name"] }
            digit_labels_in_each_image = []
            for j in range(len(digitDictionary[i]['height'])):
               number = {}
               number['height'] = digitDictionary[i]['height'][j]
               number['label']  = digitDictionary[i]['label'][j]
               number['left']   = digitDictionary[i]['left'][j]
               number['top']    = digitDictionary[i]['top'][j]
               number['width']  = digitDictionary[i]['width'][j]
               digit_labels_in_each_image.append(number)
            structCnt = structCnt + 1
            item['boxes'] = digit_labels_in_each_image
            result.append(item)
        return result
    

# Load Train Data

In [3]:
training_dataPath = './train/'
train_digitStruct = training_dataPath + 'digitStruct.mat'
digitStructure = DigitStructure(train_digitStruct)
training_data = digitStructure.getAllDigitStructure_ByDigit()

# Load Test Data

In [5]:
test_dataPath = './test/'
test_digitStructure = test_dataPath + 'digitStruct.mat'
digitStructure = DigitStructure(test_digitStructure)
testing_data = digitStructure.getAllDigitStructure_ByDigit()

# Load Extra Data

In [None]:
extra_dataPath = './extra/'
extra_digitStructure = extra_dataPath + 'digitStruct.mat'
digitStructure = DigitStructure(extra_digitStructure)
testing_data = digitStructure.getAllDigitStructure_ByDigit()

In [6]:
import PIL.Image as Image

def generate_dataset(data, folder):

    dataset = np.ndarray([len(data),32,32], dtype='float32')
    labels = np.ones([len(data),6], dtype=int) * 10
    for i in np.arange(len(data)):
        filename = data[i]['filename']
        fullname = os.path.join(folder, filename)
        im = Image.open(fullname)
        boxes = data[i]['boxes']
        num_digit = len(boxes)
        labels[i,0] = num_digit
        top = np.ndarray([num_digit], dtype='float32')
        left = np.ndarray([num_digit], dtype='float32')
        height = np.ndarray([num_digit], dtype='float32')
        width = np.ndarray([num_digit], dtype='float32')
        for j in np.arange(num_digit):
            if j < 5: 
                labels[i,j+1] = boxes[j]['label']
                if boxes[j]['label'] == 10: labels[i,j+1] = 0
            else: print('#',i,'image has more than 5 digits.')
            top[j] = boxes[j]['top']
            left[j] = boxes[j]['left']
            height[j] = boxes[j]['height']
            width[j] = boxes[j]['width']
        
        im_top = np.amin(top)
        im_left = np.amin(left)
        im_height = np.amax(top) + height[np.argmax(top)] - im_top
        im_width = np.amax(left) + width[np.argmax(left)] - im_left
        
        im_top = np.floor(im_top - 0.1 * im_height)
        im_left = np.floor(im_left - 0.1 * im_width)
        im_bottom = np.amin([np.ceil(im_top + 1.2 * im_height), im.size[1]])
        im_right = np.amin([np.ceil(im_left + 1.2 * im_width), im.size[0]])

        im = im.crop((im_left, im_top, im_right, im_bottom)).resize([32,32], Image.ANTIALIAS)
        gray_image = im.convert("L")
#         im = np.dot(np.array(im, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
        mean = np.mean(gray_image, dtype='float32')
        std = np.std(gray_image, dtype='float32', ddof=1)
        if std < 1e-4: std = 1.
        gray_image = (gray_image - mean) / std
        
        dataset[i,:,:] = gray_image[:,:]

    return dataset, labels



In [7]:
train_dataset, train_labels = generate_dataset(training_data, training_dataPath)
print(train_dataset.shape, train_labels.shape)

('#', 29929, 'image has more than 5 digits.')
((33402, 32, 32), (33402, 6))


In [8]:
test_dataset, test_labels = generate_dataset(testing_data, test_dataPath)


((33402, 32, 32), (33402, 6))


In [9]:
print(test_dataset.shape, test_labels.shape)

((13068, 32, 32), (13068, 6))


In [5]:
print len(training_data)

33402


In [121]:

class PreProcessDataSet:
    
    def __init__(self, data, path):
        
        self.data = data
        self.path = path
        self.image_size = (32,32)
        self.dataSet = \
                np.ndarray([len(self.data), 32, 32], dtype = 'float32')
            
        self.total_digits = 6  #data set contains maximum 6 digits
        # initialize all elements with 10
        self.labels = np.ones([len(self.data), self.total_digits], dtype=int) * 10
        self.number_of_labels = 10
        self.validation_index = []
        self.training_index = []
    
    def preProcess(self):
        
        for i in np.arange(len(self.dataSet)):
            get_filename = self.data[i]["filename"]
            filename = self.path + get_filename
            read_image = Image.open(filename)
            image_size = read_image.size
#             print "imamage size ", image_size[0]
            boxList = self.data[i]['boxes']
            number_of_digits = len(boxList)
            self.labels[i, 0] = number_of_digits
            #initalize arrays(top, left, height, width) based on num of digits
            height = np.ndarray([number_of_digits], dtype='float32') 
            width = np.ndarray([number_of_digits], dtype='float32')
            top = np.ndarray([number_of_digits], dtype='float32')
            left = np.ndarray([number_of_digits], dtype='float32')
            
            for digits in np.arange(number_of_digits):
                
                if digits < 5:
                    self.labels[i, digits+1] = boxList[digits]['label']
                    #if digit is 10, we consider it as 0
                    if boxList[digits]['label'] == 10:
                        self.labels[i, digits+1] = 0
                        
                else: print('#',i,'image has more than 5 digits.')
                    
                height[digits] = boxList[digits]['height'] 
                width[digits] = boxList[digits]['width']
                top[digits] = boxList[digits]['top']
                left[digits] = boxList[digits]['left']
                    

            #compute top left heigh and width of image
            image_top = np.amin(top)
            image_left = np.amin(left)
            image_height = np.amax(top) + height[np.argmax(top)] - image_top
            image_width = np.amax(left) + width[np.argmax(left)] - image_left
            
        
            #adjust to make them feasible for cropping
            image_top = np.floor(image_top - 0.1 * image_height)
            image_left = np.floor(image_left - 0.1 * image_width)
            image_bottom = np.amin([np.ceil(image_top + 1.2 * image_height), image_size[1]])
            image_right = np.amin([np.ceil(image_left + 1.2 * image_width), image_size[0]])
            
            
            read_image = \
                read_image.crop((image_left, image_top, image_right, image_bottom)).resize([32,32], Image.ANTIALIAS)

            gray_image = read_image.convert("L")            
            gray_image = self.normalization(gray_image)
            
            self.dataSet[i,:,:] = gray_image[:,:]
            
        return self.dataSet, self.labels
            
                
    def normalization(self, image):

        mean = np.mean(image, dtype='float32')
        standard_deviation = np.std(image, dtype='float32', ddof=1)

        if standard_deviation < 1e-4:
            standard_deviation = 1.

        image = (image - mean)/standard_deviation

        return image
    
    
    def createValidationSet(self, dataset, labels):
                
        
        split_portion = int(dataset.shape[0] * 0.2)
    
        train_dataset, train_labels = self.shuffleSet(dataset, labels)
        valid_dataset = train_dataset[:split_portion,:,:]
        valid_labels = train_labels[:split_portion]
        
        print "valid dataset ", valid_dataset.shape
        print "valid labels ", valid_labels.shape
        train_dataset = train_dataset[split_portion:,:,:]
        train_labels = train_labels[split_portion:]
        
        print "train data ", train_dataset.shape
        print "train labels ", train_labels.shape
        
        return train_dataset, train_labels, valid_dataset, valid_labels
        
        
    def shuffleSet(self, data, labels ):
        
        permutation = np.random.permutation(labels.shape[0])
        shuffled_dataset = data[permutation,:,:]
        shuffled_labels = labels[permutation]
        
        return shuffled_dataset, shuffled_labels
        

In [122]:
poces = PreProcessDataSet(training_data, training_dataPath)

In [128]:
train_dataset, train_labels = poces.preProcess()
train_dataset = np.delete(train_dataset, 29929, axis=0)
train_labels = np.delete(train_labels, 29929, axis=0)

('#', 29929, 'image has more than 5 digits.')


In [129]:
train_dataset, train_labels, valid_dataset, valid_labels = poces.createValidationSet(train_dataset, train_labels)

inside
6680
valid dataset  (6680, 32, 32)
valid labels  (6680, 6)
train data  (26721, 32, 32)
train labels  (26721, 6)


In [130]:
test_dataset, test_labels = generate_dataset(testing_data, test_dataPath)
print(test_dataset.shape, test_labels.shape)

((13068, 32, 32), (13068, 6))


# Save Dataset


In [133]:

pickle_file = 'dataSet_svhn.pickle'

try:
    f = open(pickle_file, 'wb')
    save = {
    'train_dataset': train_dataset,
    'train_labels': train_labels,
    'valid_dataset': valid_dataset,
    'valid_labels': valid_labels,
    'test_dataset': test_dataset,
    'test_labels': test_labels,
    }
    pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
    f.close()
    print("Done")
except Exception as e:
    print('Unable to save data to', pickle_file, ':', e)
    raise

Done


In [5]:
class Data_Generator:
    
    def __init__(self, data, data_path, batch_size, val_split):
        
        self.path = data_path
        self.batch_size = batch_size
        self.val_split = val_split
        self.data = data
        self.data_preprocess()
        
        
    def data_preprocess(self):

        
        images = [] #images 
        numbers_labels = []
        indices = []
        for imageWithLabels in self.data:
            
            image = (imageWithLabels) # image name
            image_path = self.path + (image) # image path
            for i in data[image]:
                label = i[0] #label of image
                print ("label ", label)
                numbers_labels.append(label)
                left, top, width, height = i[1], i[2], i[3], i[4]
                print ("left top width heght ", left, top, width, height)
                read_image = cv2.imread(image_path)
                cv2.imshow("im " + str(image_path), read_image)

                gray = cv2.cvtColor(read_image, cv2.COLOR_BGR2GRAY)
                crop = gray[width:height+top, left:top+left].copy()
#                 print "crop ", crop
#                 print "type of crop ", type(crop)
                cv2.imshow("cropped" +str(image_path), crop)
                
                cv2.waitKey(0)
                cv2.destroyAllWindows()
        
#                 images.append(crop)
#                 indices.append(len(images))
                
#         print ("Image size" , len(images))
#         print ("total labels ", len(numbers_labels))
#         self.number_of_images = len(images)
#         self.val_size = int(len(images) * self.val_split)
#         self.train_size = len(images) - self.val_size
        
#         print ("image ", images[indices[0]])
#         shuffle(indices)
#         print "indices ", indices
#         print "image path ", self.path
#         print ("label ", numbers_labels[indices[0]])
#         print ()
#         cv2.imshow("mage ", images[indices[0]])
#         cv2.waitKey(0)
#         cv2.destroyAllWindows()
        
#         print ("labels ", numbers_labels[indices[0]])
#         print ("number of images ", self.number_of_images)
        
#         print ("val size ", self.val_size)
#         print ("train size ", self.train_size)
       
    
    
    
    #             numbers.append(cv2.resize(crop, (32,32))) #resize image
    #             print ("leng of number ", len(numbers_labels))
#                 if len(numbers) == batch_size:
#                     print ("yield")
#                     numbers = np.expand_dims(numbers, -1)
#                     numbers_labels = pd.get_dummies(numbers_labels).as_matrix()
#                     yield (numbers, numbers_labels)
#                     numbers = [] # [image,label]
#                     numbers_labels = []

        


    #         yield(numbers, numbers_labels)

In [6]:
generator = Data_Generator(data, path, batch_size, val_split)

('label ', 2)
('left top width heght ', 77, 29, 23, 32)
('label ', 3)
('left top width heght ', 98, 25, 26, 32)
('label ', 2)
('left top width heght ', 17, 5, 8, 15)
('label ', 5)
('left top width heght ', 25, 5, 9, 15)
('label ', 1)
('left top width heght ', 246, 77, 81, 219)
('label ', 9)
('left top width heght ', 323, 81, 96, 219)
('label ', 9)
('left top width heght ', 57, 13, 15, 34)
('label ', 3)
('left top width heght ', 72, 13, 13, 34)
('label ', 3)
('left top width heght ', 52, 7, 21, 46)
('label ', 1)
('left top width heght ', 74, 10, 15, 46)


In [7]:

'''
def data_process(data, path, batch_size):

    # images_list = [] # [image,label]
    # labels = []
    numbers = [] # [image,label]
    numbers_labels = []
    for imageWithLabels in data:

    #     print( imageWithLabels)
        image = (imageWithLabels) # image name
        print (image)
        image_path = path + (image) # image path
        for i in data[image]:
            label = i[0] #label of image
            print (label)
            numbers_labels.append(label)
            left, top, width, height = i[1], i[2], i[3], i[4]
            print ("left", "top", "width", "height", left, top, width, height)
            read_image = cv2.imread(image_path)

            gray = cv2.cvtColor(read_image, cv2.COLOR_BGR2GRAY)
            crop = gray[width:height+top, left:top+left].copy()
            numbers.append(crop)
#             numbers.append(cv2.resize(crop, (32,32))) #resize image
#             print ("leng of number ", len(numbers_labels))
            if len(numbers) == batch_size:
                print ("yield")
                numbers = np.expand_dims(numbers, -1)
                numbers_labels = pd.get_dummies(numbers_labels).as_matrix()
                yield (numbers, numbers_labels)
                numbers = [] # [image,label]
                numbers_labels = []
'''

'\ndef data_process(data, path, batch_size):\n\n    # images_list = [] # [image,label]\n    # labels = []\n    numbers = [] # [image,label]\n    numbers_labels = []\n    for imageWithLabels in data:\n\n    #     print( imageWithLabels)\n        image = (imageWithLabels) # image name\n        print (image)\n        image_path = path + (image) # image path\n        for i in data[image]:\n            label = i[0] #label of image\n            print (label)\n            numbers_labels.append(label)\n            left, top, width, height = i[1], i[2], i[3], i[4]\n            print ("left", "top", "width", "height", left, top, width, height)\n            read_image = cv2.imread(image_path)\n\n            gray = cv2.cvtColor(read_image, cv2.COLOR_BGR2GRAY)\n            crop = gray[width:height+top, left:top+left].copy()\n            numbers.append(crop)\n#             numbers.append(cv2.resize(crop, (32,32))) #resize image\n#             print ("leng of number ", len(numbers_labels))\n         

In [10]:
image_size = (32,32)
image_size[0]

32