In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype("float32")/255.0
x_test  =  x_test.astype("float32")/255.0

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


# Sequential

In [3]:
model = keras.Sequential([
        layers.Input(shape=(32,32,3)),
        layers.Conv2D(32, (3,3), padding='valid', activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10),
])

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                147520    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                6

In [4]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=3e-4),
    metrics=['accuracy']
)

model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Epoch 1/10
782/782 - 3s - loss: 1.6921 - accuracy: 0.4017
Epoch 2/10
782/782 - 3s - loss: 1.3723 - accuracy: 0.5167
Epoch 3/10
782/782 - 3s - loss: 1.2596 - accuracy: 0.5586
Epoch 4/10
782/782 - 3s - loss: 1.1772 - accuracy: 0.5876
Epoch 5/10
782/782 - 3s - loss: 1.1139 - accuracy: 0.6148
Epoch 6/10
782/782 - 3s - loss: 1.0643 - accuracy: 0.6312
Epoch 7/10
782/782 - 3s - loss: 1.0204 - accuracy: 0.6473
Epoch 8/10
782/782 - 3s - loss: 0.9822 - accuracy: 0.6599
Epoch 9/10
782/782 - 3s - loss: 0.9483 - accuracy: 0.6733
Epoch 10/10
782/782 - 3s - loss: 0.9169 - accuracy: 0.6849
157/157 - 0s - loss: 0.9981 - accuracy: 0.6574


[0.9981233477592468, 0.6574000120162964]

# Funtional

In [5]:
def my_model():
  inputs = layers.Input(shape=(32,32,3))

  x = layers.Conv2D(32, 2)(inputs)
  x = layers.BatchNormalization()(x)
  x = keras.activations.relu(x)
  x = layers.MaxPooling2D()(x)

  x = layers.Conv2D(64, 5, padding='same')(x)
  x = layers.BatchNormalization()(x)
  x = keras.activations.relu(x)
  x = layers.MaxPooling2D()(x)

  x = layers.Conv2D(128, 3)(x)
  x = layers.BatchNormalization()(x)
  x = keras.activations.relu(x)
  x = layers.MaxPooling2D()(x)
  
  x = layers.Flatten()(x)

  x = layers.Dense(64, activation='relu')(x)

  outputs = layers.Dense(10)(x)
  model = keras.Model(inputs=inputs, outputs=outputs)
  return model

model = my_model()
print(model.summary())


Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 31, 31, 32)        416       
_________________________________________________________________
batch_normalization (BatchNo (None, 31, 31, 32)        128       
_________________________________________________________________
tf_op_layer_Relu (TensorFlow [(None, 31, 31, 32)]      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 64)        51264     
_________________________________________________________________
batch_normalization_1 (Batch (None, 15, 15, 64)       

In [6]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=3e-4),
    metrics=['accuracy']
)

model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Epoch 1/10
782/782 - 240s - loss: 1.3113 - accuracy: 0.5337
Epoch 2/10
782/782 - 236s - loss: 0.9321 - accuracy: 0.6721
Epoch 3/10
782/782 - 237s - loss: 0.7644 - accuracy: 0.7332
Epoch 4/10
782/782 - 240s - loss: 0.6380 - accuracy: 0.7786
Epoch 5/10
782/782 - 237s - loss: 0.5562 - accuracy: 0.8073
Epoch 6/10
782/782 - 241s - loss: 0.4688 - accuracy: 0.8366
Epoch 7/10
782/782 - 239s - loss: 0.3891 - accuracy: 0.8667
Epoch 8/10
782/782 - 237s - loss: 0.3259 - accuracy: 0.8889
Epoch 9/10
782/782 - 240s - loss: 0.2594 - accuracy: 0.9149
Epoch 10/10
782/782 - 237s - loss: 0.2133 - accuracy: 0.9292
157/157 - 11s - loss: 1.0670 - accuracy: 0.6912


[1.066959023475647, 0.6912000179290771]

## Regularization
1) Reduce model capacity <br>
2) L2-regularization<br>
3) Dropout (Since many outputs are dropped, it take longer to train) <br>
4) Early Stopping <br>
5) Data Augmentation <br>
6) Batch Normalization (mainly for faster training but also regularizer effect)

In [11]:
from tensorflow.keras import regularizers

def my_model():
  inputs = layers.Input(shape=(32,32,3))

  x = layers.Conv2D(
      32, 2, padding='same', kernel_regularizer=regularizers.l2(0.01)
      )(inputs)
  x = layers.BatchNormalization()(x) 
  x = keras.activations.relu(x)
  x = layers.MaxPooling2D()(x)

  x = layers.Conv2D(
      64, 5, padding='same', kernel_regularizer=regularizers.l2(0.01)
      )(x)
  x = layers.BatchNormalization()(x)
  x = keras.activations.relu(x)
  x = layers.MaxPooling2D()(x)

  x = layers.Conv2D(
      128, 3, padding='same', kernel_regularizer=regularizers.l2(0.01)
      )(x)
  x = layers.BatchNormalization()(x)
  x = keras.activations.relu(x)


  x = layers.Flatten()(x)
  x = layers.Dense(
      64, activation='relu', kernel_regularizer=regularizers.l2(0.01)
      )(x)
  x = layers.Dropout(0.5)(x)

  outputs = layers.Dense(10)(x)
  model = keras.Model(inputs=inputs, outputs=outputs)
  return model

model = my_model()
print(model.summary())

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 32, 32, 32)        416       
_________________________________________________________________
batch_normalization_9 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
tf_op_layer_Relu_9 (TensorFl [(None, 32, 32, 32)]      0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 16, 16, 64)        51264     
_________________________________________________________________
batch_normalization_10 (Batc (None, 16, 16, 64)       

In [12]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=3e-4),
    metrics=['accuracy']
)

model.fit(x_train, y_train, batch_size=64, epochs=50, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Epoch 1/50
782/782 - 5s - loss: 2.9935 - accuracy: 0.3501
Epoch 2/50
782/782 - 5s - loss: 1.8993 - accuracy: 0.4548
Epoch 3/50
782/782 - 5s - loss: 1.6145 - accuracy: 0.5079
Epoch 4/50
782/782 - 5s - loss: 1.5151 - accuracy: 0.5310
Epoch 5/50
782/782 - 5s - loss: 1.4544 - accuracy: 0.5520
Epoch 6/50
782/782 - 5s - loss: 1.4202 - accuracy: 0.5602
Epoch 7/50
782/782 - 5s - loss: 1.3985 - accuracy: 0.5719
Epoch 8/50
782/782 - 6s - loss: 1.3745 - accuracy: 0.5834
Epoch 9/50
782/782 - 5s - loss: 1.3522 - accuracy: 0.5918
Epoch 10/50
782/782 - 5s - loss: 1.3486 - accuracy: 0.5930
Epoch 11/50
782/782 - 5s - loss: 1.3298 - accuracy: 0.6006
Epoch 12/50
782/782 - 5s - loss: 1.3137 - accuracy: 0.6072
Epoch 13/50
782/782 - 5s - loss: 1.3034 - accuracy: 0.6124
Epoch 14/50
782/782 - 5s - loss: 1.2878 - accuracy: 0.6218
Epoch 15/50
782/782 - 5s - loss: 1.2793 - accuracy: 0.6245
Epoch 16/50
782/782 - 5s - loss: 1.2702 - accuracy: 0.6271
Epoch 17/50
782/782 - 5s - loss: 1.2535 - accuracy: 0.6337
Epoch 

[1.1841787099838257, 0.6945000290870667]

NOTE: Accuracy is lowe since need more training due to dropout but generalization is good (difference in train error and test error is small)