In [1]:
from warnings import simplefilter

simplefilter(action='ignore', category=FutureWarning)

In [2]:
from keras.datasets import cifar10  # subroutines for fetching the CIFAR-10 dataset
from keras.models import Model  # basic class for specifying and training a neural network
from keras.layers import Input, Convolution2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.utils import np_utils  # utilities for one-hot encoding of ground truth values
import numpy as np

2023-05-02 14:44:59.169671: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## a CNN will typically have more hyperparameters than an MLP

**batch size**, representing the number of training examples being used simultaneously during a single iteration of the gradient descent algorithm;

The **number of epochs**, representing the number of times the training algorithm will iterate over the entire training set before terminating1;

The **kernel sizes** in the convolutional layers;

The **pooling size** in the pooling layers;

The **number of kernels** in the convolutional layers;

The **dropout probability** (we will apply dropout after each pooling, and after the fully connected layer) to prevent overfitting

The **number of neurons** in the fully connected layer of the MLP

In [3]:
batch_size = 32     # in each iteration, we consider 32 training examples at once

# num_epochs = 200  # we iterate 200 times over the entire training set
num_epochs = 10     # EVEN 10 TIMES IS A LOT

kernel_size = 3     # we will use 3x3 kernels throughout

pool_size = 2       # we will use 2x2 pooling throughout

conv_depth_1 = 32   # we will initially have 32 kernels per conv. layer...

conv_depth_2 = 64   # ...switching to 64 after the first pooling layer

drop_prob_1 = 0.25  # dropout after pooling with probability 0.25

drop_prob_2 = 0.5   # dropout in the FC layer with probability 0.5

hidden_size = 512   # the FC layer will have 512 neurons

In [4]:
# fetch CIFAR-10 data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# there are 50000 training examples in CIFAR-10
num_train, height, width, depth = X_train.shape  # (50000, 32, 32, 3)

print(f"\nnum_train: {num_train}\nheight: {height}\nwidth: {width} \ndepth: {depth}")


num_train: 50000
height: 32
width: 32 
depth: 3


## shape[0] gets you "number of..."

In [5]:
num_test = X_test.shape[0]  # there are 10000 test examples in CIFAR-10

num_classes = np.unique(y_train).shape[0]  # there are 10 image classes

print(f"\nnum_test: {num_test}\nnum_classes: {num_classes}")


num_test: 10000
num_classes: 10


## How to normalize

In [6]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= np.max(X_train)  # Normalize data to [0, 1] range
X_test /= np.max(X_test)    # Normalize data to [0, 1] range


## One-hot encode the labels

### to_categorical

```py
tf.keras.utils.to_categorical(
    y, num_classes=None, dtype="float32"
)
```

Converts a class vector (integers) to binary class matrix.


In [7]:
Y_train = np_utils.to_categorical(y_train, num_classes)

Y_test = np_utils.to_categorical(y_test, num_classes)

## Model

**four** Convolution2D layers, with a MaxPooling2D layer following after the second and the fourth convolution

the output of the second pooling layer is **flattened to 1D** (via the Flatten layer), and passed through two fully connected (Dense) layers

**ReLU activations** will once again be used for all layers except the output dense layer, which will use a softmax activation (for purposes of probabilistic classification)

**Dropout** used for regularization and to prevent overfitting

In [8]:
inp = Input(shape=(height, width, depth)) # depth goes last in TensorFlow back-end (first in Theano)

# Conv [32] -> Conv [32] -> Pool (with dropout on the pooling layer)
conv_1 = Convolution2D(conv_depth_1, (kernel_size, kernel_size), padding='same', activation='relu')(inp)
conv_2 = Convolution2D(conv_depth_1, (kernel_size, kernel_size), padding='same', activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)

# Conv [64] -> Conv [64] -> Pool (with dropout on the pooling layer)
conv_3 = Convolution2D(conv_depth_2, (kernel_size, kernel_size), padding='same', activation='relu')(drop_1)
conv_4 = Convolution2D(conv_depth_2, (kernel_size, kernel_size), padding='same', activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)

# Now flatten to 1D, apply FC -> ReLU (with dropout) -> softmax
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)

out = Dense(num_classes, activation='softmax')(drop_3)

2023-05-02 14:45:04.796515: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
# To define a model, just specify its input and output layers
model = Model(inputs=inp, outputs=out)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 64)        18496     
                                                             

## Compile the model

In [10]:
model.compile(loss='categorical_crossentropy', # cross-entropy loss function
              optimizer='adam',                # Adam optimiser
              metrics=['accuracy'])            # reporting the accuracy


## Train the model using the training set

In [11]:
model.fit(X_train, Y_train,
          batch_size=batch_size, epochs=num_epochs,
          verbose=1, validation_split=0.1)  # ...holding out 10% of the data for validation


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd57795dfa0>

## Evaluate the trained model on the test set

In [12]:
model.evaluate(X_test, Y_test, verbose=1)




[0.6868680715560913, 0.765999972820282]

# Keras One-Hot-Encoding

Keras' `np_utils.to_categorical()` is considered one-hot encoding.

It is used to **convert integer class labels** into a **one-hot encoded representation.**

One-hot encoding is a process that **converts categorical data**, such as **class labels**, into a **binary vector** where each element represents a unique category

It has a value of 1 if the sample belongs to that category, and 0 otherwise.

For example, if we have 3 classes represented by integers 0, 1, and 2, `to_categorical()` will convert a list of labels `[0, 1, 2, 1]` into a one-hot encoded matrix of shape `(4, 3)` (4 rows, 3 columns):

```c
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)
```

In this matrix, the **rows** correspond to the **samples**

The **columns** correspond to the **unique categories.**

The value of 1 in each row indicates the category to which the sample belongs.