## CIFAR10
  - 10 categories of 32 x 32 sized color images
  - 50000 training and 10000 testing samples
  
The full CIFAR dataset contains 80 million tiny colored images.
  - The main page: https://www.cs.toronto.edu/%7Ekriz/cifar.html
  - About CIFAR: https://www.cs.toronto.edu/%7Ekriz/learning-features-2009-TR.pdf

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)

import tensorflow as tf

tf.random.set_seed(42)

import tensorflow.keras

In [2]:
import os
from functools import partial

from sklearn.model_selection import StratifiedShuffleSplit

from tensorflow.keras.datasets.cifar10 import load_data
from tensorflow.keras import Sequential
from tensorflow.keras.layers import InputLayer, Dense, BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping

## You can download the data from the original link above and load it like this ...

In [3]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

file_dicts = {}
for i in range(1, 6):
    batch = f'data_batch_{i}'
    filename = os.path.join('.', 'data', 'cifar', 'cifar-10-batches-py', batch)
    file_dicts[i-1] = unpickle(filename)

In [4]:
def append_data(data, type_):
    a = data[0][type_]
    for i in range(1, 5):
        a = np.r_[a, data[i][type_]]
    return a

In [5]:
X_full = append_data(file_dicts, b'data')
y_full = append_data(file_dicts, b'labels')

In [6]:
X_full.shape, y_full.shape

((50000, 3072), (50000,))

In [7]:
test_file = os.path.join('.', 'data', 'cifar', 'cifar-10-batches-py', 'test_batch')
test_file_dict = unpickle(test_file)

In [8]:
X_test = test_file_dict[b'data']
y_test = test_file_dict[b'labels']

In [9]:
len(X_test), len(y_test)

(10000, 10000)

In [10]:
# Use StratifiedShuffleSplit to split training data into training and validation. 
# This will ensure that the training and validation data has an equal proportion of classes.
#
split = StratifiedShuffleSplit(n_splits=1, train_size=0.8, test_size=0.2) # We don't need to specify both test/train.
                                                                          # sizes, but it is good for clarity.
for train_idx, test_idx in split.split(X_full, y_full):
    X_train, X_val = X_full[train_idx], X_full[test_idx]
    y_train, y_val = y_full[train_idx], y_full[test_idx]

In [12]:
X_train.shape, len(y_train), X_test.shape, len(y_test)

((40000, 3072), 40000, (10000, 3072), 10000)

In [13]:
# Validate that the split shows the correct proportion of classes
pd.Series(y_train).value_counts(normalize=True), pd.Series(y_val).value_counts(normalize=True)

(9    0.1
 8    0.1
 7    0.1
 6    0.1
 5    0.1
 4    0.1
 3    0.1
 2    0.1
 1    0.1
 0    0.1
 dtype: float64, 7    0.1
 6    0.1
 5    0.1
 4    0.1
 3    0.1
 2    0.1
 9    0.1
 1    0.1
 8    0.1
 0    0.1
 dtype: float64)

## ... or an easier way is to use Tensorflow's load_data() function

In [85]:
(X_train, y_train), (X_test, y_test) = load_data()

In [86]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_train = y_train.flatten()
y_test = y_test.flatten()

In [87]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((50000, 3072), (50000,), (10000, 3072), (10000,))

In [88]:
# Validate that the split shows the correct proportion of classes
pd.Series(y_train).value_counts(normalize=True), pd.Series(y_test).value_counts(normalize=True)

(9    0.1
 8    0.1
 7    0.1
 6    0.1
 5    0.1
 4    0.1
 3    0.1
 2    0.1
 1    0.1
 0    0.1
 dtype: float64, 7    0.1
 6    0.1
 5    0.1
 4    0.1
 3    0.1
 2    0.1
 9    0.1
 1    0.1
 8    0.1
 0    0.1
 dtype: float64)

In [89]:
# Use StratifiedShuffleSplit to split training data into training and validation. 
# This will ensure that the training and validation data has an equal proportion of classes.
#
split = StratifiedShuffleSplit(n_splits=1, train_size=0.8, test_size=0.2) # We don't need to specify both test/train.
                                                                          # sizes, but it is good for clarity.
for train_idx, test_idx in split.split(X_train, y_train):
    X_train_1, X_val = X_train[train_idx], X_train[test_idx]
    y_train_1, y_val = y_train[train_idx], y_train[test_idx]

X_train = X_train_1
y_train = y_train_1

In [90]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((40000, 3072), (40000,), (10000, 3072), (10000,))

## Create a model with Batch Normalization layers

In [91]:
def create_model(with_bn=False):
    model = Sequential([
        InputLayer(input_shape=[3072])
    ])

    if with_bn:
        model.add(BatchNormalization())                           # Add BN layer after input

    HeNormalDense = partial(Dense,                                # Put all your common init here.
                            kernel_initializer='he_normal',
                            use_bias=False if with_bn else True)  # BN has bias, so remove it
                                                                  # from the Dense layer.

    for _ in range(20):
        model.add(HeNormalDense(100))
        
        if with_bn:
            model.add(BatchNormalization())
        
        model.add(Activation('elu'))

    model.add(Dense(10, activation='softmax'))                   # Output layer
    
    return model

In [92]:
model = create_model(with_bn=False)
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 100)               307300    
_________________________________________________________________
activation_40 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_43 (Dense)             (None, 100)               10100     
_________________________________________________________________
activation_41 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_44 (Dense)             (None, 100)               10100     
_________________________________________________________________
activation_42 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_45 (Dense)             (None, 100)              

In [93]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',
              metrics=['accuracy'])

In [94]:
early_stopping_cb = EarlyStopping(patience=10,
                                  restore_best_weights=True)

In [97]:
history = model.fit(X_train, y_train, epochs=100,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stopping_cb])

Train on 40000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


In [99]:
model = create_model(with_bn=True)
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 3072)              12288     
_________________________________________________________________
dense_84 (Dense)             (None, 100)               307200    
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
activation_80 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_85 (Dense)             (None, 100)               10000     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)               400       
_________________________________________________________________
activation_81 (Activation)   (None, 100)              

In [100]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',
              metrics=['accuracy'])

In [101]:
early_stopping_cb = EarlyStopping(patience=10,
                                  restore_best_weights=True)

In [102]:
history = model.fit(X_train, y_train, epochs=100,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stopping_cb])

Train on 40000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
