In [21]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import cifar10



### Enable GPU

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
len(tf.config.list_physical_devices('GPU'))>0

True

### Load CIFAR dataset

In [5]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [7]:
# 50000 training samples and 10000 test samples
# 32x32 image size and 3 channels

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")

Shape of X_train: (50000, 32, 32, 3)
Shape of y_train: (50000, 1)
Shape of X_test: (10000, 32, 32, 3)
Shape of y_test: (10000, 1)


In [6]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

### Sequential TF model

In [10]:
# (defualt)padding = valid will make image dimensions smaller and padding = same will make image dimensions same as input

model = keras.Sequential(
    [
        keras.Input(shape = (32, 32, 3)), # maintain the shape of the input instead of flattening it to 1D array
        layers.Conv2D(32, 3, padding = 'valid', activation = 'relu'), # 32 channels, 3x3 kernel size, valid padding
        layers.MaxPooling2D(pool_size = (2, 2)), # 2x2 pooling size
        layers.Conv2D(64, 3, activation = 'relu'),
        layers.MaxPooling2D(), # defualt pool size is 2x2
        layers.Conv2D(128, 3, activation = 'relu'),
        layers.Flatten(),
        layers.Dense(64, activation = 'relu'), # 1 internmediate layer
        layers.Dense(10) # 10 output classes
        
        
    ]
)

In [11]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 15, 15, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 6, 6, 64)         0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 4, 4, 128)         73856     
                                                                 
 flatten (Flatten)           (None, 2048)             

In [12]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True), # no softmax activation mentioned in the last layer while buidling the model so we need to use from_logits = True
    optimizer = keras.optimizers.Adam(learning_rate = 3e-4),
    metrics = ['accuracy'],
)

In [13]:
model.fit(X_train, y_train, batch_size = 64, epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x17196778be0>

In [14]:
model.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[0.8732865452766418, 0.6937000155448914]

### Funcitonal TF API

In [16]:
def cnn_model():
    inputs = keras.Input(shape = (32, 32, 3)) 
    x = layers.Conv2D(32, 3)(inputs) # layer 1, default padding is valid
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 5, padding = "same")(x) # layer 2
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Conv2D(128, 3)(x) # layer 3
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64, activation = 'relu')(x) # layer 4
    outputs = layers.Dense(10)(x) # output layer 
    
    model = keras.Model(inputs = inputs, outputs = outputs, name = "cnn_model")
    
    return model

In [17]:
model = cnn_model()

compile, fit and evaluate are same as sequential API method

In [18]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 3e-4),
    metrics = ['accuracy'],
)

In [19]:
model.fit(X_train, y_train, batch_size = 64, epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x171a2b10a90>

In [20]:
model.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[1.0541561841964722, 0.7135000228881836]

- We notice that training accuracy is 93% and test accuracy is 71%

- solution: Regularization

### Regularization to reduce overfitting

In [22]:
# add regularization to the each layer
# bacthnormalization is used to normalize the output of the previous layer and also has regularizing affect

def cnn_model_with_reg():
    inputs = keras.Input(shape = (32, 32, 3)) 
    x = layers.Conv2D(32, 3, padding = "same", kernel_regularizer = regularizers.l2(0.01),)(inputs)  # layer 1, default padding is valid
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 5, padding = "same", kernel_regularizer = regularizers.l2(0.01),)(x) # layer 2
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Conv2D(128, 3,  padding = "same", kernel_regularizer = regularizers.l2(0.01))(x) # layer 3
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64, activation = 'relu', kernel_regularizer = regularizers.l2(0.01))(x) # layer 4
    x = layers.Dropout(0.5)(x) # dropout takes longer to train
    outputs = layers.Dense(10)(x) # output layer 
    
    model = keras.Model(inputs = inputs, outputs = outputs, name = "cnn_model")
    
    return model

In [23]:
model_reg = cnn_model_with_reg()

In [25]:
model_reg.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 3e-4),
    metrics = ['accuracy'],
)

In [27]:
# running for more epochs

model_reg.fit(X_train, y_train, batch_size = 64, epochs = 150, verbose = 1) 

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x172594f8dc0>

In [28]:
model_reg.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[1.5183371305465698, 0.5059000253677368]