In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, regularizers

from data_preprocessing import one_hot_encode
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from sklearn import preprocessing

## Preparing dataset

In [2]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [3]:
X_train = np.empty((0, 32*32*3), int)
y_train = np.empty((0, 10), int)


for i in range(1, 6):
    raw_data = unpickle(f'../data/data_batch_{i}')
    X_tmp = raw_data[b'data']
    y_tmp = np.array(raw_data[b'labels'])
    X_train = np.append(X_train, X_tmp, axis=0)
    y_train = np.append(y_train, y_tmp)

X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.05, random_state=42)

ss = preprocessing.StandardScaler().fit(X_train)
X_train = ss.transform(X_train)
X_validation = ss.transform(X_validation)

X_train = np.reshape(X_train, (-1, 32, 32, 3), order='F').transpose(0, 2, 1, 3)
X_validation = np.reshape(X_validation, (-1, 32, 32, 3), order='F').transpose(0, 2, 1, 3)

## Simple CNN model

In [4]:
simple_cnn = keras.Sequential()

In [5]:
simple_cnn.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
simple_cnn.add(layers.MaxPooling2D((2, 2)))
simple_cnn.add(layers.Conv2D(64, (3, 3), activation='relu'))
simple_cnn.add(layers.MaxPooling2D((2, 2)))
simple_cnn.add(layers.Conv2D(128, (3, 3), activation='relu'))
simple_cnn.add(layers.Flatten())
simple_cnn.add(layers.Dense(64, activation='relu'))
simple_cnn.add(layers.Dense(64, activation='relu'))
simple_cnn.add(layers.Dense(10))

In [6]:
simple_cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                1

In [7]:
simple_cnn.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [8]:
simple_cnn.fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1a4a96bac8>

### Conclusion
Model is not generalizing for validation set -- accuracy is below 0.73 on validation set, while on training test accuracy is about 0.95.

Next step: regularization.

## Simple CNN model with regularization

In [9]:
simple_cnn_reg = keras.Sequential()

In [10]:
simple_cnn_reg.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3),
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.MaxPooling2D((2, 2)))
simple_cnn_reg.add(layers.Conv2D(64, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.MaxPooling2D((2, 2)))
simple_cnn_reg.add(layers.Conv2D(128, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Flatten())
simple_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
simple_cnn_reg.add(layers.Dense(10))

In [11]:
simple_cnn_reg.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)               

In [12]:
simple_cnn_reg.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [13]:
simple_cnn_reg.fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1b44063f60>

### Conclusion
Model is generalizing better, but it's also much weaker - accuracy on both validation set and training set is much worse.

Next step: adding more layers, lowering regularization.

## Deeper CNN with regularization

In [14]:
deep_cnn_reg = keras.Sequential()

In [15]:
deep_cnn_reg.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3),
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.MaxPooling2D((2, 2)))
deep_cnn_reg.add(layers.Conv2D(64, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.MaxPooling2D((2, 2)))
deep_cnn_reg.add(layers.Conv2D(128, (3, 3), activation='relu',
                                 kernel_regularizer=regularizers.l2(0.001)))
deep_cnn_reg.add(layers.Flatten())
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(128, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(64, activation='relu',
                                kernel_regularizer=regularizers.l2(0.01)))
deep_cnn_reg.add(layers.Dense(10))

In [16]:
deep_cnn_reg.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_2 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)              

In [17]:
deep_cnn_reg.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [19]:
deep_cnn_reg.fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation))

Train on 47500 samples, validate on 2500 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1a9ae22668>

### Conclusion
Accuracy on validation set is ~0.74, it is also more or less consistant with results on training set.

Next step: adding data augmentation and using more advanced architecture, maybe resnet. (In the next notebook)