In [43]:
import os
import sys
from glob import glob
import h5py
import time
import progressbar

import numpy as np

from skimage import io, color, exposure, transform
from sklearn.model_selection import train_test_split

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from keras import backend
backend.set_image_data_format('channels_first')

from matplotlib import pyplot
%matplotlib inline

In [18]:
NUM_CLASSES = 43
IMG_SIZE = 48

<h2> Preprocessing </h2>

1. Histogram normalization in HSV yellow
2. Crop central region
3. Resize
4. Roll RGB axis to 0

In [8]:
def preprocess(image):
    
    # Histogram normalization in HSV yellow
    temp = color.rgb2hsv(image)
    temp[:,:,2] = exposure.equalize_hist(temp[:,:,2])
    image = color.hsv2rgb(temp)
    
    # Crop central region
    ms = min(image.shape[:-1])
    center = image.shape[0]//2, image.shape[1]//2
    image = image[
        center[0] - ms//2 : center[0] + ms//2,
        center[1] - ms//2 : center[1] + ms//2,
        :
    ]
    
    # Resize
    image = transform.resize(image, (IMG_SIZE, IMG_SIZE))
    
    # Roll RGB axis to 0
    image = np.rollaxis(image, -1)
    
    return image

<h2> Preprocessing </h2>

1. Store images into numpy arrays
2. Get labels
3. Convert to one-hot

In [62]:
try:
    X = h5py.File('data.h5')['images'][:]
    Y = h5py.File('data.h5')['labels'][:]
    print("Found preprocessed images in data.h5")
except (IOError, OSError, KeyError):
    print("Could not find preprocessed data ['data.h5']\n")
    root = 'GTSRB/Final_Training/Images/'
    images = []
    labels = []
    paths = glob(os.path.join(root, '*/*.ppm'))
    np.random.shuffle(paths)
    total = len(paths)

    start = time.time()
    print("\nTotal training images:\t{}".format(total))
    for i in range(total):
        sys.stdout.write('\r')
        sys.stdout.write("Processed image # \t{} | {}% complete".format(i, round(i*100/total, 2)))
        sys.stdout.flush()

        path = paths[i]
        image = preprocess(io.imread(path))
        label = int(path.split('\\')[-2])
        images.append(image)
        labels.append(label)
    end = time.time()
    print("\nFinished preprocessing!\n")

    X = np.array(images, dtype='float32')
    Y = np.eye(NUM_CLASSES, dtype='uint8')[labels]
    h5py.File('data.h5').create_dataset('images', data = X)
    h5py.File('data.h5').create_dataset('labels', data = Y)
    print("Saved preprocessed data in data.h5")
    
    print("\nTime spent preprocessing: {} seconds".format(round(end - start)))

Could not find preprocessed data ['data.h5']


Total training images:	39209
Processed image # 	39208 | 100.0% complete
Finished preprocessing!

Saved preprocessed data in data.h5

Time spent preprocessing: 367 seconds


In [55]:
print(images[0:10])

[array([[[0.33156974, 0.28626684, 0.27928801, ..., 0.18539222,
         0.20051468, 0.28338869],
        [0.34459221, 0.26889178, 0.26042011, ..., 0.25098292,
         0.29153621, 0.20890917],
        [0.28572401, 0.25547917, 0.24771434, ..., 0.28340974,
         0.20574106, 0.07416784],
        ...,
        [0.04339703, 0.01916942, 0.02430748, ..., 0.01346996,
         0.01678204, 0.06854227],
        [0.05448279, 0.02723717, 0.05890266, ..., 0.02572452,
         0.01328408, 0.14430192],
        [0.06541533, 0.05206402, 0.2732677 , ..., 0.08405097,
         0.03194945, 0.11531779]],

       [[0.26254149, 0.24398422, 0.25313063, ..., 0.1739883 ,
         0.19433481, 0.25618211],
        [0.28417977, 0.24264714, 0.24775426, ..., 0.2434358 ,
         0.28929059, 0.20698474],
        [0.2489528 , 0.24155857, 0.23953612, ..., 0.27487066,
         0.20003747, 0.07397115],
        ...,
        [0.03943513, 0.01730764, 0.02252242, ..., 0.01404088,
         0.01755778, 0.07093127],
        [0.

In [56]:
print(labels[0:10])

[5, 13, 31, 29, 4, 4, 31, 14, 10, 12]
