In [13]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, regularizers

from data_preprocessing import one_hot_encode
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

## Preparing dataset

In [2]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [3]:
X_train = np.empty((0, 32, 32, 3), int)
y_train = np.empty((0, 10), int)


for i in range(1, 6):
    raw_data = unpickle(f'../data/data_batch_{i}')
    X_tmp = raw_data[b'data']
    X_tmp = np.reshape(X_tmp, (-1, 32, 32, 3), order='F').transpose(0, 2, 1, 3)
    y_tmp = np.array(raw_data[b'labels'])
    X_train = np.append(X_train, X_tmp, axis=0)
    y_train = np.append(y_train, y_tmp)

X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.05, random_state=42)

## Simple CNN model

In [26]:
simple_cnn = keras.Sequential()

In [27]:
simple_cnn.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
simple_cnn.add(layers.MaxPooling2D((2, 2)))
simple_cnn.add(layers.Conv2D(64, (3, 3), activation='relu'))
simple_cnn.add(layers.MaxPooling2D((2, 2)))
simple_cnn.add(layers.Conv2D(128, (3, 3), activation='relu'))
simple_cnn.add(layers.Flatten())
simple_cnn.add(layers.Dense(64, activation='relu'))
simple_cnn.add(layers.Dense(64, activation='relu'))
simple_cnn.add(layers.Dense(10))

In [28]:
simple_cnn.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_3 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 64)               

In [29]:
simple_cnn.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [30]:
simple_cnn.fit(X_train, y_train, epochs=50, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a6a802ba8>

### Conclusion
Model is not generalizing for validation set -- accuracy is stuck below 0.68. On training test accuracy is about 0.95, which is acceptable (for now).

Next step: regularization, providing more training examples (data augmentation).

## Simple CNN model with regularization

In [21]:
simple_cnn_reg = keras.Sequential()

In [22]:
simple_cnn_reg.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3),
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.MaxPooling2D((2, 2)))
simple_cnn_reg.add(layers.Conv2D(64, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.MaxPooling2D((2, 2)))
simple_cnn_reg.add(layers.Conv2D(128, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Flatten())
simple_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Dense(10))

In [23]:
simple_cnn_reg.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_2 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)               

In [24]:
simple_cnn_reg.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [25]:
simple_cnn_reg.fit(X_train, y_train, epochs=50, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a6b434be0>

### Conclusion
Model is generalizing better, but it's also much weaker - accuracy on validation set is on the same level as before, it only got worse on training set.

Next step: adding more layers, lowering regularization.

## Deeper CNN with regularization

In [36]:
deep_cnn_reg = keras.Sequential()

In [37]:
deep_cnn_reg.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3),
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.MaxPooling2D((2, 2)))
deep_cnn_reg.add(layers.Conv2D(64, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.MaxPooling2D((2, 2)))
deep_cnn_reg.add(layers.Conv2D(128, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.Flatten())
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(10))

In [38]:
deep_cnn_reg.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_5 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_17 (Dense)             (None, 128)              

In [39]:
deep_cnn_reg.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [40]:
deep_cnn_reg.fit(X_train, y_train, epochs=50, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1aa53868d0>

### Conclusion
Accuracy on validation set is much better than before, but there is again problem with overfitting.

Next step: data normalization and augmentation. 