# Purpose of this notebook
- Use CNN to solve cifar10
- Adding regularization with L2 and Dropout

# Import

In [7]:
from tensorflow import keras # type: ignore
from tensorflow.keras import layers, regularizers # type: ignore
from tensorflow.keras.datasets import cifar10 # type: ignore

# Load data

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 32, 32, 3)
(50000, 1)
(10000, 32, 32, 3)
(10000, 1)


# Before normalize, range in [0, 255]

In [3]:
print(x_train[0][0][0])
print(x_test[0][0][0])

[59 62 63]
[158 112  49]


# Normalize
- Note: Run following cell only once

In [4]:
x_train = x_train / 255
x_test = x_test / 255

# After normalize, range in [0, 1]

In [5]:
print(x_train[0][0][0])
print(x_test[0][0][0])

[0.23137255 0.24313725 0.24705882]
[0.61960784 0.43921569 0.19215686]


# Keras Sequential API using CNN

In [10]:
model = keras.Sequential([
    layers.Input(shape=(32, 32, 3), name='input_layer'),
    layers.Conv2D(
        32, 3,
        padding='valid',
        activation='relu',
        kernel_regularizer=regularizers.l2(0.01),
        name='conv_layer1',
    ),
    layers.MaxPooling2D(pool_size=(2, 2), name='pool_layer1'),
    layers.Conv2D(
        64, 3,
        padding='valid',
        activation='relu',
        kernel_regularizer=regularizers.l2(0.01),
        name='conv_layer2',
    ),
    layers.MaxPooling2D(pool_size=(2, 2), name='pool_layer2'),
    layers.Conv2D(
        128, 3,
        padding='valid',
        activation='relu',
        kernel_regularizer=regularizers.l2(0.01),
        name='conv_layer3',
    ),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dense(
        64,
        activation='relu',
        kernel_regularizer=regularizers.l2(0.01),
        name='dense_layer',
    ),
    layers.Dropout(0.5),
    layers.Dense(10, name='output_layer'),
], name="sequential_model")

# model.summary 

In [11]:
print(model.summary())

None


# model.compile

In [12]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

# Note
- If output layer `layers.Dense(10)` then use `from_logits=True`
- If output layer `layers.Dense(10, activation='softmax')` then use `from_logits=False`, or just simply remove `from_logits=False`

# Train the model using model.fit

In [13]:
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

Epoch 1/5
1563/1563 - 19s - 12ms/step - accuracy: 0.4354 - loss: 2.0130
Epoch 2/5
1563/1563 - 18s - 12ms/step - accuracy: 0.5233 - loss: 1.5683
Epoch 3/5
1563/1563 - 17s - 11ms/step - accuracy: 0.5469 - loss: 1.5073
Epoch 4/5
1563/1563 - 18s - 11ms/step - accuracy: 0.5650 - loss: 1.4668
Epoch 5/5
1563/1563 - 18s - 11ms/step - accuracy: 0.5775 - loss: 1.4380


<keras.src.callbacks.history.History at 0x7fc94f286640>

# Note
- model.fit above run in 1m36s (longer time than CNN in 10.ipynb)
- And achieve accuracy 0.5775 (lower than raw Dense in 10.ipynb)

# Conclusion
- The result is not what I expected
- Desired: Reduce overfitting
- But get: longer training time and lower 
- At least, I know how to add:
  + L2 regularization
  + Batch normalization
  + Dropout

# Evaluate test data using model.evaluate

In [14]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

313/313 - 2s - 5ms/step - accuracy: 0.5507 - loss: 1.4737


[1.4737130403518677, 0.5507000088691711]