<h1>Chapter 3. Going beyond the Basics: Detecting Features in Images</h1>

In [10]:
import tensorflow as tf 
data = tf.keras.datasets.fashion_mnist
import numpy as np

In [11]:
(training_images, training_labels), (test_images, test_labels) = data.load_data()
print('train image shape:', training_images.shape[:])
print('test image shape:', test_images.shape[:])


train image shape: (60000, 28, 28)
test image shape: (10000, 28, 28)


In [12]:
training_images = training_images.reshape(60000, 28, 28, 1)
training_images = training_images/255
test_images = test_images.reshape(10000, 28, 28, 1)
test_images = test_images/255

Because **Conv2D** layers are designed for multicolor channel processing (RGB as example). Therefore, the original 28 x 28 image need to be reshaped as (28x28x1). 

In [13]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', input_shape = (28, 28, 1)), 
    tf.keras.layers.MaxPooling2D(2, 2), 
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'), 
    tf.keras.layers.MaxPooling2D(2, 2), 
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dense(128, activation = tf.nn.relu), 
    tf.keras.layers.Dense(10, activation = tf.nn.softmax)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(optimizer = 'adam', 
              loss = 'sparse_categorical_crossentropy', 
              metrics = ['accuracy'])

In [7]:
model.fit(training_images, training_labels, epochs = 20)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 15ms/step - accuracy: 0.7801 - loss: 0.6099
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 14ms/step - accuracy: 0.8892 - loss: 0.3036
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 15ms/step - accuracy: 0.9067 - loss: 0.2509
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 14ms/step - accuracy: 0.9182 - loss: 0.2152
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9283 - loss: 0.1900
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9375 - loss: 0.1664
Epoch 7/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9479 - loss: 0.1395
Epoch 8/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9513 - loss: 0.1280
Epoch 9/

<keras.src.callbacks.history.History at 0x1fc03e7b890>

In [8]:
model.summary()

<h2>Parameter calculation</h2>

<li><strong>Conv2d</strong>: Input shape: (28 x 28 x1) -> Output shape: (26 x 26 x 64). This layer applies 64 conv filters into input image, resulting 64 dimensional-reduced output images (26 x 26). <strong>General formular</strong>: n x n kernel leads to the output image of [x-(n-1)]^2. <strong>Parameter calculation</strong>: 64 kernel, each of which (3 x 3) is applied to the input image, plus the bias term added into each convolution: 64 x (3 x 3 + 1) = 640  </li>
<li><strong>Max pooling</strong>: conduct (2 x 2) max-pooling, therefore reduce the total image dimension by a quarter</li>
<li><strong>Conv2d</strong>: Input shape (13 x 13 x 54) -> Output shape: (11 x 11 x 54). This layer applies 64 conv filters into the existing 64 convolved image of previous layer. <strong>Parameter calculation</strong>: 64 x (64 x 9) + 64 = 36928</li>
<li></strong>Max pooling</strong>: Same as above</li>
<li><strong>Flatten</strong>: Break all the input array into 1-D array.</li>



In [9]:
result = model.evaluate(test_images, test_labels)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9069 - loss: 0.5337
