In [1]:
from keras.models import Sequential, model_from_json
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D
from keras.utils.np_utils import to_categorical

import pickle
import json

%matplotlib inline

Using TensorFlow backend.


If we want to load the trained model on the Keras Lab for the German Traffic Signs dataset we would have to do the following:

```python
path2json = "/home/ubuntu/SDC/CarND-Keras-Lab/final_trained_model.json"
path2weights = "/home/ubuntu/SDC/CarND-Keras-Lab/final_trained_model.pkl"

# Load architecture
with open(path2json, "r") as fin:
    model_json = fin.readlines()
    
model = model_from_json(model_json[0])

# Load weights
with open(path2weights, "rb") as fin:
    model_weights = pickle.load(fin)
    
model.set_weights(model_weights)
```

However, for this exercise (regarding we are not going to do transfer learning) is easier to define the architecture from scratch.

In [6]:
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [7]:
# Preprocess the data
# Normalization
def normalize(x, a, b):
    return a + (x-x.min())*(b-a)/(x.max() - x.min())

a, b = -0.5, 0.5
X_train_normalized = normalize(X_train, a, b)
X_test_normalized = normalize(X_test, a, b)

# One-hot encoding
y_train_one_hot = to_categorical(y_train, nb_classes=10)
y_test_one_hot = to_categorical(y_test, nb_classes=10)

In [4]:
model = Sequential()

model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(32, 32,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode='valid'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(10)) # In CIFAR10 there are 10 classes instead of 43 in the German Traffic Signs dataset
model.add(Activation('softmax'))


model.compile('adam', 'categorical_crossentropy', ['accuracy'])

In [5]:
history = model.fit(X_train_normalized, y_train_one_hot, batch_size=64, nb_epoch=10, validation_split=0.2)

Train on 40000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Would it take less doubling batch_size? And what about accuracy?

nvidia-smi command said the GPU utilization is only 52%

In [9]:
history = model.fit(X_train_normalized, y_train_one_hot, batch_size=128, nb_epoch=10, validation_split=0.2)

Train on 40000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Now utilization is 61%. It's a bit faster in training and the val_acc is more steep, but at the end it reaches a similar accuracy (in train has greatly improved). My intuition is that a learning rate schedule could help to improved the results after epoch 2.

In [10]:
res = model.evaluate(X_test_normalized, y_test_one_hot)



In [12]:
print(res)
print(model.metrics_names)

[0.97403912444114682, 0.71750000000000003]
['loss', 'acc']


Well, not bad for a baseline. In the German Traffic Signs dataset this architecture leaded to these results:

```python
print(res)
print(model.metrics_names)
[0.16229501442625374, 0.95558194779066086]
['loss', 'acc']
```
