In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# libraries import
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10

devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(devices[0], True)

In [3]:
# Put plot of cifar images to showcase each category data and another one for limited size to show its size 

In [4]:
# load dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print(x_train.shape)
print(y_train.shape)

(50000, 32, 32, 3)
(50000, 1)


In [5]:
# Let's normalise training and testing dataset but why???
# 1. Normally if your features are in different scale that leads to issue in training because neural network will inclined towards with features who has large scale values
# 2. Large input values are computationaly expensive and memory hungry
# 3. It leads to slower convergences of loss function may create problem in accuracy
x_train = (x_train.astype("float32") / 255.0) # Type casting because by default result of it in float64 type
x_test = (x_test.astype("float32") / 255.0)

print(x_train.dtype)
print(x_test.dtype)

print(x_train.shape)
print(x_test.shape)

float32
float32
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [6]:
# Sequential API
model = keras.Sequential()
model.add(layers.Input(shape=(32, 32, 3))) # height, width, channel
model.add(layers.Conv2D(32, (3, 3), padding="valid", activation="relu")) # Number of kernal/Channel with size 3*3
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, (3, 3), padding="valid", activation="relu"))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(128, (3, 3), padding="valid", activation="relu"))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(10))

model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(lr=3e-4), metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                1

In [7]:
# Play with batch_size(1024 to 124) to see difference in val_accuracy 
model.fit(x_train, y_train, batch_size=124, epochs=10, validation_split=0.15)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x197d99887f0>

In [8]:
model.evaluate(x_test, y_test, batch_size=124)



[1.018444538116455, 0.6412000060081482]

In [9]:
# Functional API
def func_model():
    inputs = layers.Input(shape=(32, 32, 3))
    x = layers.Conv2D(32, 3)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    
    x = layers.Conv2D(64, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    
    x = layers.Conv2D(128, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(10)(x)
    
    func_model = keras.Model(inputs=inputs, outputs=outputs)
    func_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(3e-4), metrics=["accuracy"])
    func_model.summary()
    
    return func_model

func_model = func_model()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 30, 32)        128       
_________________________________________________________________
activation (Activation)      (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 13, 13, 64)       

In [10]:
func_model.fit(x_train, y_train, batch_size=124, epochs=10, validation_split=0.15)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x197922cc670>

As we can see the by just adding BatchNormalization layer overall test accuracy has increase but our training accuracy has shot to roof i.e. sign of overfitting. We need to handle this issue by regularization techniques.

In [11]:
func_model.evaluate(x_test, y_test, batch_size=124)



[0.9799597859382629, 0.6992999911308289]