In [None]:
# CELL 0 : import 

from os import walk 
from keras.preprocessing.image import ImageDataGenerator
from matplotlib.pyplot import imshow, subplots, title, xticks, legend, show, figure
from numpy import zeros, arange, where
from keras import Sequential
from keras.layers import RandomZoom, RandomRotation, Conv2D, MaxPool2D, Dropout, Flatten, Dense
from random import randint
from tensorflow import gather  
from keras.utils import to_categorical

In [None]:
# CELL 1.1 : accessing to data 
# Needs [ CELL 0 : import ]

# defines variables 

#-----------------------------------------------------------------------------

path_training = 'BelgiumTSC_Training\Training'
path_testing = 'BelgiumTSC_Testing\Testing'

nbr_class = 62 # len(next(walk(path_training))[1])  counting the number of classes 
print('number of classes : ', nbr_class)

#-----------------------------------------------------------------------------

# resol is the format for the images that we want for convenience

resol = (50,50) # Square image only ! (data augmentation flip)
print("Resolution of images : ", resol)

In [None]:
# CELL 1.2 : accessing to data 
# Needs [ CELL 1.1 ]

#-----------------------------------------------------------------------------

# retrieving the number of images to be treated in the training folder

DIR = path_training
counter = 0
for root, dirs, files in walk(DIR) :
    for file in files:    
        if file.endswith('.ppm') or file.endswith('.jpeg'):
            counter += 1

print("number of images in the training folder : ", counter)

#-----------------------------------------------------------------------------

# Creating an image.DirectoryIterator to work over the images of the training folder  

train_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(path_training,target_size = resol,
batch_size = counter,class_mode = 'binary', color_mode='rgb')

In [None]:
# CELL 1.3 : accessing to data 
# Needs [ CELL 1.1 ]

#-----------------------------------------------------------------------------

# retrieving the number of images to be treated in the testing folder

DIR = path_testing
counter = 0
for root, dirs, files in walk(DIR) :
    for file in files:    
        if file.endswith('.ppm') or file.endswith('.jpeg'):
            counter += 1

print("number of images in the testing folder : ", counter)


#-----------------------------------------------------------------------------

# Creating an image.DirectoryIterator to work over the images of the testing folder 

test_datagen = ImageDataGenerator(rescale=1./255)
test_set = test_datagen.flow_from_directory(path_testing,target_size = resol,
batch_size = counter,class_mode = 'binary', color_mode='rgb')

In [None]:
# CELL 1.4 : storing in arrays
# Needs [ CELL 1.2, CELL 1.3 ]

#-----------------------------------------------------------------------------

# Storing all the information in arrays for convenience 

X_train , y_train = training_set.next()
X_test , y_test = test_set.next()

print("Shape of X_train : ", X_train.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of X_test : ", X_test.shape)
print("Shape of y_test : ", y_test.shape)

In [None]:
# CELL 1.5 : visualisation of the initial sets
# Needs [ CELL 1.4 ] 

#-----------------------------------------------------------------------------

# Print the number of signs of each type in the initial sets

initial_nbr_train = zeros(nbr_class, dtype=int)
initial_nbr_test = zeros(nbr_class, dtype=int)

for i in range(nbr_class) :
    initial_nbr_train[i] = int((y_train.copy() == i).sum())  # Number of images of class i in the Training set
    initial_nbr_test[i] = int((y_test.copy() == i).sum())   # Number of images of class i in the Test set 

#-----------------------------------------------------------------------------

print("Number of each sign in the train set : ")
print()
print(initial_nbr_train)
print()
print("Total of signs : ", initial_nbr_train.sum())
print()

print("Number of each sign in the test set : ")
print()
print(initial_nbr_test)
print()
print("Total of signs : ", initial_nbr_test.sum())

In [None]:
# CELL 1.6 :  visualisation of the dataset
# Needs [ CELL 1.5 ]

# Defines a fct that plot histo of what's in the set 
# Also print the same info as CELL 1.5 

#-----------------------------------------------------------------------------

def graphs (nrb_train, y_train) : 

    print("Number of each sign in the train set : ")
    print()
    print(nrb_train)
    print()
    print("Total of signs : ", nrb_train.sum())
    print()

    print("Number of each sign in the test set : ")
    print()
    print(initial_nbr_test)
    print()
    print("Total of signs : ", initial_nbr_test.sum())

    #-----------------------------------------------------------------------------

    # We plot an histo showing how many signs of each class we have in each set 

    fig, ax = subplots(figsize = (20, 7))
    bins = [x + 0.5 for x in range(-1, nbr_class)]
    ax.hist([y_train.copy(), y_test.copy()], range = (0, nbr_class-1), bins=bins, edgecolor = 'white', color = ['blueviolet','black'], label = ['y_train', 'y_test'])
    title("Visualisation of the number of signs of each class in each set")
    xticks(arange(nbr_class))
    legend()
    show()

In [None]:
# CELL 1.7 :  visualisation of the dataset
# Needs [ CELL 1.6 :  visualisation of the dataset ]

# Visualisation of the initial sets 

#-----------------------------------------------------------------------------

graphs(initial_nbr_train, y_train)

In [None]:
# CELL 1.8 : randomization for augmentation 
# Needs [ CELL 1.4 ]

# This cell is about data augmentation 
# We'll randomize our initial set and do our augmentation with this (cells later)

#-----------------------------------------------------------------------------

data_augmentation = Sequential() 

data_augmentation.add(RandomZoom(0.2))
data_augmentation.add(RandomRotation(0.1)) 

#-----------------------------------------------------------------------------

# We apply the augmentation on our datasets 
augmented_image_train = data_augmentation(X_train.copy())
augmented_image_test = data_augmentation(X_test.copy())

#-----------------------------------------------------------------------------

print("Shape of the randomized test set : ", augmented_image_test.shape)
print("Shape of the randomized training set : ", augmented_image_train.shape)

In [None]:
# CELL 1.9 : test of the randomization 
# Needs [ CELL 1.8 ]

# A little test to see the result of the augmentation 

#-----------------------------------------------------------------------------

index = randint(0,len(X_train))
figure()
imshow(X_train[index])
figure()
imshow(augmented_image_train[index])

In [None]:
# CELL 1.10 : augmentation
# Needs [ CELL 1.8 ] 

#-----------------------------------------------------------------------------

# Second method of data augmentation 
# All classes are represented with the same number of sign, the max already in 

memory = zeros(nbr_class, dtype=int)
lim = max(initial_nbr_train) #500
counter = 0

#-----------------------------------------------------------------------------

for i in range(nbr_class) :

    memory[i] = lim - initial_nbr_train[i]
    counter += lim - initial_nbr_train[i]

#-----------------------------------------------------------------------------

# We create new sets that we'll fill with the data of the initial sets + the augmented data

X_train_second = zeros((len(X_train) + counter, resol[0], resol[1], 3))
y_train_second = zeros(len(y_train) + counter)

X_train_second[:len(X_train)] = X_train.copy()
y_train_second[:len(y_train)] = y_train.copy()

#-----------------------------------------------------------------------------

# We'll start adding values at this index

index = len(X_train)

nbr_train_second = initial_nbr_train.copy() 

#-----------------------------------------------------------------------------

for i in range(nbr_class) :

    indices = where(y_train.copy() == i)
    augmented_image = gather(X_train.copy(), indices=indices[0])

    for j in range(memory[i]) :

        idx = randint(0, len(indices[0])-1)

        nbr_train_second[i] += 1 

        X_train_second[index] = augmented_image[idx]
        y_train_second[index] = i
        index += 1

#-----------------------------------------------------------------------------

print("Shape of the augmented training set with second method : ", X_train_second.shape)
print("Shape of the augmented training target with second method : ", y_train_second.shape)

In [None]:
# CELL 1.11 :  visualisation of the dataset
# Needs [ CELL 1.10 ]

# Visualisation of the augmented sets 

#-----------------------------------------------------------------------------

graphs(nbr_train_second,y_train_second )

In [None]:
# CELL 1.12 : categorical
# Needs [ CELL 1.10 ]

# We put our results to categorical

#-----------------------------------------------------------------------------

y_test_tc = to_categorical(y_test.copy(), nbr_class)
y_train_tc = to_categorical(y_train.copy(), nbr_class)

print("Shape of y_train without augmentation : ", y_train_tc.shape)
print("Shape of y_test without augmentation : ", y_test_tc.shape)

#-----------------------------------------------------------------------------

y_train_second_tc = to_categorical(y_train_second.copy(), nbr_class)

print("Shape of y_train with second method of augmentation : ", y_train_second_tc.shape)
print("Shape of y_test with second method of augmentation : ", y_test_tc.shape)

In [None]:
import keras_tuner as kt 

def model_builder(hp):

  model = Sequential()

  # Tune the number of filters for the second Conv2D 
  # Choose an optimal value from 64-128

  hp_k1 = hp.Int('kernel_1', min_value = 2, max_value = 7, step = 1)
  hp_f1 = hp.Int('filter_1', min_value = 64, max_value = 128, step = 4)
  model.add(Conv2D(kernel_size=(hp_k1,hp_k1),filters=hp_f1, activation='relu', input_shape=X_train_second.shape[1:]))

  hp_k2 = hp.Int('kernel_2', min_value = 2, max_value = 5, step = 1) 
  model.add(Conv2D(kernel_size=(hp_k2,hp_k2),filters=hp_f1, activation='relu'))

  hp_p1 = hp.Int('pool_1', min_value = 3, max_value = 5, step = 1)
  model.add(MaxPool2D(pool_size=(hp_p1,hp_p1)))

  hp_d1 = hp.Float("dropout_1", min_value=0.1, max_value=0.5, default=0.25, step=0.05)
  model.add(Dropout(rate=hp_d1))


  hp_k3 = hp.Int('kernel_3', min_value = 2, max_value = 4, step = 1)
  hp_f2 = hp.Int('filter_2', min_value = 128, max_value = 256, step = 16)
  model.add(Conv2D(kernel_size=(hp_k3,hp_k3),filters=hp_f2, activation='relu'))

  hp_k4 = hp.Int('kernel_4', min_value = 2, max_value = 3, step = 1) 
  model.add(Conv2D(kernel_size=(hp_k4,hp_k4),filters=hp_f2, activation='relu'))

  hp_p2 = hp.Int('pool_2', min_value = 2, max_value = 3, step = 1)
  model.add(MaxPool2D(pool_size=(hp_p2,hp_p2)))
  
  hp_d2 = hp.Float("dropout_2", min_value=0.1, max_value=0.5, default=0.25, step=0.05)
  model.add(Dropout(rate=hp_d2))
  

  model.add(Flatten())

  hp_units = hp.Int('dense', min_value = 62, max_value = 302, step = 20)

  model.add(Dense(hp_units, activation = 'relu'))
  model.add(Dense(nbr_class, activation = 'softmax'))

  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return model

In [None]:
from keras.callbacks import EarlyStopping 

stop_early = EarlyStopping(baseline=0.98)

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective = 'val_accuracy', 
                     max_epochs = 3,
                     factor = 3,
                     directory = 'my_dir',
                     project_name = 'hyper_tuning',
                     overwrite = True)

In [None]:
tuner.search(X_train_second,y_train_second_tc, epochs = 3, verbose = 1, validation_data = (X_test, y_test_tc), callbacks = [stop_early])

In [None]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

print(f"""
The hyperparameter search is complete.\n 
The optimal kernel in first Convolutional layer is  {best_hps.get('kernel_1')}.\n  
The optimal filter in first Convolutional layer is  {best_hps.get('filter_1')}.\n  
The optimal kernel in second Convolutional layer is  {best_hps.get('kernel_2')}.\n
The optimal filter in second Convolutional layer is  {best_hps.get('filter_2')}.\n
The optimal number of units in the first densely-connected layer is {best_hps.get('dense')}.\n .
""")