In [43]:
import os
import sys
from glob import glob
import h5py
import time
import progressbar

import numpy as np

from skimage import io, color, exposure, transform
from sklearn.model_selection import train_test_split

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from keras import backend
backend.set_image_data_format('channels_first')

from matplotlib import pyplot
%matplotlib inline

In [18]:
NUM_CLASSES = 43
IMG_SIZE = 48

<h2> Preprocessing </h2>

1. Histogram normalization in HSV yellow
2. Crop central region
3. Resize
4. Roll RGB axis to 0

In [8]:
def preprocess(image):
    
    # Histogram normalization in HSV yellow
    temp = color.rgb2hsv(image)
    temp[:,:,2] = exposure.equalize_hist(temp[:,:,2])
    image = color.hsv2rgb(temp)
    
    # Crop central region
    ms = min(image.shape[:-1])
    center = image.shape[0]//2, image.shape[1]//2
    image = image[
        center[0] - ms//2 : center[0] + ms//2,
        center[1] - ms//2 : center[1] + ms//2,
        :
    ]
    
    # Resize
    image = transform.resize(image, (IMG_SIZE, IMG_SIZE))
    
    # Roll RGB axis to 0
    image = np.rollaxis(image, -1)
    
    return image

<h2> Preprocessing </h2>

1. Store images into numpy arrays
2. Get labels
3. Convert to one-hot

In [66]:
try:
    X = h5py.File('data.h5')['images'][:]
    Y = h5py.File('data.h5')['labels'][:]
    print("Using preprocessed images from data.h5")
except (IOError, OSError, KeyError):
    print("Could not find preprocessed data ['data.h5']\n")
    root = 'GTSRB/Final_Training/Images/'
    images = []
    labels = []
    paths = glob(os.path.join(root, '*/*.ppm'))
    np.random.shuffle(paths)
    total = len(paths)

    start = time.time()
    print("\nTotal training images:\t{}".format(total))
    for i in range(total):
        sys.stdout.write('\r')
        sys.stdout.write("Processed image # \t{} | {}% complete".format(i, round(i*100/total, 2)))
        sys.stdout.flush()

        path = paths[i]
        image = preprocess(io.imread(path))
        label = int(path.split('\\')[-2])
        images.append(image)
        labels.append(label)
    end = time.time()
    print("\nFinished preprocessing!\n")

    X = np.array(images, dtype='float32')
    Y = np.eye(NUM_CLASSES, dtype='uint8')[labels]
    h5py.File('data.h5').create_dataset('images', data = X)
    h5py.File('data.h5').create_dataset('labels', data = Y)
    print("Saved preprocessed data in data.h5")
    
    print("\nTime spent preprocessing: {} seconds".format(round(end - start)))

Using preprocessed images from data.h5


In [67]:
print(images[0:10])

[array([[[0.54306027, 0.60842261, 0.76118396, ..., 1.        ,
         1.        , 1.        ],
        [0.54100395, 0.60362452, 0.75139688, ..., 1.        ,
         1.        , 1.        ],
        [0.53881347, 0.59385401, 0.72741428, ..., 1.        ,
         1.        , 1.        ],
        ...,
        [0.08873038, 0.13091131, 0.22561343, ..., 0.21580519,
         0.21920789, 0.22052262],
        [0.07848259, 0.10699979, 0.17116349, ..., 0.20943515,
         0.21429675, 0.2163573 ],
        [0.0742013 , 0.09701012, 0.14832996, ..., 0.20750339,
         0.2122362 , 0.21429675]],

       [[0.49455853, 0.55524228, 0.70001678, ..., 1.        ,
         1.        , 1.        ],
        [0.48961892, 0.54842165, 0.68939454, ..., 1.        ,
         1.        , 1.        ],
        [0.47909531, 0.53252035, 0.66246589, ..., 0.99993873,
         1.        , 1.        ],
        ...,
        [0.07690797, 0.11246083, 0.19278962, ..., 0.19187927,
         0.19027387, 0.18957741],
        [0.

In [68]:
print(labels[0:10])

[3, 35, 34, 33, 17, 20, 17, 2, 28, 8]
