# 8. Introduction to Deep Learning for Computer Vision
# 8.1. Introduction to Convnets

In [1]:
# import libraries
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

### Instantiate a small convnet
- The architecture consist of Conv2D and MaxPooling2D.

In [2]:
# construct the model
inputs = keras.Input(shape=(28,28,1), name="Input") # (image_height, image_width, image_channel)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu", name="Con2D_1")(inputs) # 
x = layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_1")(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu", name="Conv2D_2")(x)
x = layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_2")(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu", name="Conv2D_3")(x)
x = layers.Flatten(name="Flatten_1")(x)
outputs = layers.Dense(10, activation="softmax", name="Dense")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

2023-02-18 19:09:34.833833: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64::/opt/conda/lib
2023-02-18 19:09:34.833895: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
# check architecture
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 28, 28, 1)]       0         
                                                                 
 Con2D_1 (Conv2D)            (None, 26, 26, 32)        320       
                                                                 
 MaxPooling2D_1 (MaxPooling2  (None, 13, 13, 32)       0         
 D)                                                              
                                                                 
 Conv2D_2 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 MaxPooling2D_2 (MaxPooling2  (None, 5, 5, 64)         0         
 D)                                                              
                                                                 
 Conv2D_3 (Conv2D)           (None, 3, 3, 128)         73856 

**Note**: As we do 10-way classification, the last layer has 10 outputs and a softmax activation. Thus we will use categorical crossentropy, and because the labels are integer, we will use sparse version, aka `sparse_categorical_crossentropy`

### Train & evaluate the convnet on MNIST dataset

In [4]:
# load data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# reshape and normalize the image
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype("float32") / 255

# initialize optimizer, loss, and metrics
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# train model
model.fit(train_images, train_labels, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fba73627890>

In [6]:
# test modl
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test accuracy: {test_acc}")

Test accuracy: 0.9908000230789185


**Note**: using fully Dense net, we got test accuracy of 97.8%, but with Conv net, we got 99.1% and decrease the error rate by 60% relatively. Why does Conv net perform better than Dense net?
- The convolutional operation.
- The max-pooling operation.