# 1. Importing libraries

In [None]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
print(tf.__version__)

2.9.2


In [None]:
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# 2. Sequential API

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## 2.1 Option-1

In [None]:
# Sequential API (Very convenient, not very flexible)
model = keras.Sequential(
    [
        keras.Input(shape=(28 * 28)),
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        layers.Dense(10),
    ]
)
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               401920    
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


## 2.2 Option-2

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(256, activation="relu", name="my_layer"))
model.add(layers.Dense(10))
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               401920    
                                                                 
 my_layer (Dense)            (None, 256)               131328    
                                                                 
 dense_4 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


# 3. Functional API

In [None]:
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation="relu", name="first_layer")(inputs)
x = layers.Dense(256, activation="relu", name="second_layer")(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 784)]             0         
                                                                 
 first_layer (Dense)         (None, 512)               401920    
                                                                 
 second_layer (Dense)        (None, 256)               131328    
                                                                 
 dense_5 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


# 4. Training

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 6s - loss: 0.1846 - accuracy: 0.9438 - 6s/epoch - 3ms/step
Epoch 2/5
1875/1875 - 4s - loss: 0.0792 - accuracy: 0.9753 - 4s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 3s - loss: 0.0560 - accuracy: 0.9820 - 3s/epoch - 2ms/step
Epoch 4/5
1875/1875 - 4s - loss: 0.0400 - accuracy: 0.9874 - 4s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 3s - loss: 0.0334 - accuracy: 0.9893 - 3s/epoch - 2ms/step
313/313 - 1s - loss: 0.0816 - accuracy: 0.9791 - 691ms/epoch - 2ms/step


[0.08158241957426071, 0.9790999889373779]

# 5. Changing a few hyperparameters

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Epoch 1/5
938/938 - 2s - loss: 0.0126 - accuracy: 0.9955 - 2s/epoch - 2ms/step
Epoch 2/5
938/938 - 2s - loss: 0.0089 - accuracy: 0.9971 - 2s/epoch - 2ms/step
Epoch 3/5
938/938 - 2s - loss: 0.0122 - accuracy: 0.9959 - 2s/epoch - 2ms/step
Epoch 4/5
938/938 - 2s - loss: 0.0127 - accuracy: 0.9958 - 2s/epoch - 2ms/step
Epoch 5/5
938/938 - 2s - loss: 0.0127 - accuracy: 0.9959 - 2s/epoch - 2ms/step
157/157 - 0s - loss: 0.0877 - accuracy: 0.9811 - 306ms/epoch - 2ms/step


[0.08766359090805054, 0.9811000227928162]

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/10
1875/1875 - 4s - loss: 0.0269 - accuracy: 0.9916 - 4s/epoch - 2ms/step
Epoch 2/10
1875/1875 - 4s - loss: 0.0180 - accuracy: 0.9944 - 4s/epoch - 2ms/step
Epoch 3/10
1875/1875 - 4s - loss: 0.0167 - accuracy: 0.9947 - 4s/epoch - 2ms/step
Epoch 4/10
1875/1875 - 4s - loss: 0.0140 - accuracy: 0.9959 - 4s/epoch - 2ms/step
Epoch 5/10
1875/1875 - 4s - loss: 0.0135 - accuracy: 0.9960 - 4s/epoch - 2ms/step
Epoch 6/10
1875/1875 - 4s - loss: 0.0156 - accuracy: 0.9957 - 4s/epoch - 2ms/step
Epoch 7/10
1875/1875 - 4s - loss: 0.0109 - accuracy: 0.9968 - 4s/epoch - 2ms/step
Epoch 8/10
1875/1875 - 4s - loss: 0.0136 - accuracy: 0.9962 - 4s/epoch - 2ms/step
Epoch 9/10
1875/1875 - 4s - loss: 0.0112 - accuracy: 0.9968 - 4s/epoch - 2ms/step
Epoch 10/10
1875/1875 - 4s - loss: 0.0128 - accuracy: 0.9967 - 4s/epoch - 2ms/step
313/313 - 1s - loss: 0.1378 - accuracy: 0.9817 - 581ms/epoch - 2ms/step


[0.1378454864025116, 0.9817000031471252]

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=128, verbose=2)

Epoch 1/5
469/469 - 1s - loss: 2.0127e-06 - accuracy: 1.0000 - 1s/epoch - 3ms/step
Epoch 2/5
469/469 - 1s - loss: 1.6419e-06 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 3/5
469/469 - 1s - loss: 1.2015e-06 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 4/5
469/469 - 1s - loss: 9.8084e-07 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 5/5
469/469 - 1s - loss: 8.5404e-07 - accuracy: 1.0000 - 1s/epoch - 2ms/step
79/79 - 0s - loss: 0.1495 - accuracy: 0.9850 - 169ms/epoch - 2ms/step


[0.14951565861701965, 0.9850000143051147]

In [None]:
model.fit(x_train, y_train, batch_size=128, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=128, verbose=2)

Epoch 1/10
469/469 - 1s - loss: 0.0028 - accuracy: 0.9993 - 1s/epoch - 2ms/step
Epoch 2/10
469/469 - 1s - loss: 1.8052e-04 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 3/10
469/469 - 1s - loss: 6.0403e-05 - accuracy: 1.0000 - 1s/epoch - 3ms/step
Epoch 4/10
469/469 - 1s - loss: 4.0569e-05 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 5/10
469/469 - 1s - loss: 2.9913e-05 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 6/10
469/469 - 1s - loss: 1.3320e-05 - accuracy: 1.0000 - 1s/epoch - 3ms/step
Epoch 7/10
469/469 - 1s - loss: 7.7278e-06 - accuracy: 1.0000 - 1s/epoch - 3ms/step
Epoch 8/10
469/469 - 1s - loss: 5.0828e-06 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 9/10
469/469 - 1s - loss: 3.5341e-06 - accuracy: 1.0000 - 1s/epoch - 2ms/step
Epoch 10/10
469/469 - 1s - loss: 2.6088e-06 - accuracy: 1.0000 - 1s/epoch - 2ms/step
79/79 - 0s - loss: 0.1434 - accuracy: 0.9851 - 164ms/epoch - 2ms/step


[0.14341481029987335, 0.9850999712944031]

In [None]:
model.fit(x_train, y_train, batch_size=16, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=16, verbose=2)

Epoch 1/10
3750/3750 - 9s - loss: 0.0366 - accuracy: 0.9920 - 9s/epoch - 2ms/step
Epoch 2/10
3750/3750 - 9s - loss: 0.0215 - accuracy: 0.9945 - 9s/epoch - 2ms/step
Epoch 3/10
3750/3750 - 9s - loss: 0.0215 - accuracy: 0.9951 - 9s/epoch - 2ms/step
Epoch 4/10
3750/3750 - 12s - loss: 0.0196 - accuracy: 0.9953 - 12s/epoch - 3ms/step
Epoch 5/10
3750/3750 - 9s - loss: 0.0189 - accuracy: 0.9953 - 9s/epoch - 2ms/step
Epoch 6/10
3750/3750 - 9s - loss: 0.0183 - accuracy: 0.9955 - 9s/epoch - 2ms/step
Epoch 7/10
3750/3750 - 10s - loss: 0.0191 - accuracy: 0.9958 - 10s/epoch - 3ms/step
Epoch 8/10
3750/3750 - 9s - loss: 0.0189 - accuracy: 0.9960 - 9s/epoch - 2ms/step
Epoch 9/10
3750/3750 - 9s - loss: 0.0158 - accuracy: 0.9964 - 9s/epoch - 2ms/step
Epoch 10/10
3750/3750 - 9s - loss: 0.0185 - accuracy: 0.9962 - 9s/epoch - 2ms/step
625/625 - 1s - loss: 0.2079 - accuracy: 0.9797 - 1s/epoch - 2ms/step


[0.2078699916601181, 0.9797000288963318]

# 6. Changing Optimization Algorithms

In [None]:
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 784)]             0         
                                                                 
 first_layer (Dense)         (None, 512)               401920    
                                                                 
 second_layer (Dense)        (None, 256)               131328    
                                                                 
 dense_5 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# Using Gradient Desc. with Momentum
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.SGD(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 4s - loss: 0.0164 - accuracy: 0.9965 - 4s/epoch - 2ms/step
Epoch 2/5
1875/1875 - 3s - loss: 0.0124 - accuracy: 0.9972 - 3s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 4s - loss: 0.0101 - accuracy: 0.9977 - 4s/epoch - 2ms/step
Epoch 4/5
1875/1875 - 4s - loss: 0.0085 - accuracy: 0.9981 - 4s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 4s - loss: 0.0073 - accuracy: 0.9983 - 4s/epoch - 2ms/step
313/313 - 1s - loss: 0.1791 - accuracy: 0.9823 - 710ms/epoch - 2ms/step


[0.1791049689054489, 0.9822999835014343]

In [None]:
# Using Gradient Desc. with Momentum
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.RMSprop(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 4s - loss: 0.0021 - accuracy: 0.9995 - 4s/epoch - 2ms/step
Epoch 2/5
1875/1875 - 4s - loss: 1.3128e-04 - accuracy: 0.9999 - 4s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 4s - loss: 4.8201e-07 - accuracy: 1.0000 - 4s/epoch - 2ms/step
Epoch 4/5
1875/1875 - 4s - loss: 4.6987e-09 - accuracy: 1.0000 - 4s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 4s - loss: 4.0730e-10 - accuracy: 1.0000 - 4s/epoch - 2ms/step
313/313 - 1s - loss: 0.2243 - accuracy: 0.9860 - 685ms/epoch - 2ms/step


[0.2243240475654602, 0.9860000014305115]

In [None]:
# Using Gradient Desc. with Momentum
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adagrad(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 4s - loss: 1.9471e-10 - accuracy: 1.0000 - 4s/epoch - 2ms/step
Epoch 2/5
1875/1875 - 4s - loss: 1.9471e-10 - accuracy: 1.0000 - 4s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 4s - loss: 1.9471e-10 - accuracy: 1.0000 - 4s/epoch - 2ms/step
Epoch 4/5
1875/1875 - 3s - loss: 1.9471e-10 - accuracy: 1.0000 - 3s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 3s - loss: 1.9471e-10 - accuracy: 1.0000 - 3s/epoch - 2ms/step
313/313 - 1s - loss: 0.2243 - accuracy: 0.9860 - 694ms/epoch - 2ms/step


[0.2243240475654602, 0.9860000014305115]