<a href="https://colab.research.google.com/github/kowsiknd/MNIST/blob/main/MNIST_Convolution_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers, regularizers

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
tf.__version__

'2.6.0'

In [3]:
from tensorflow.keras.datasets import mnist

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [5]:
from sklearn.model_selection import train_test_split
validation_split_ratio = 0.20

In [21]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_split_ratio, random_state=42)

Model Creation

In [22]:
class CNNBlock(layers.Layer):
  def __init__(self, output_channels, kernel_size=3):
    super(CNNBlock, self).__init__()
    self.conv = layers.Conv2D(
        output_channels,
        kernel_size=kernel_size,
        padding="same"
    )
    self.norm = layers.BatchNormalization()

  def call(self, input_tensor, training=False):
    x = self.conv(input_tensor)
    x = self.norm(x)
    output = tf.nn.relu(x)
    return output


In [33]:
input_shape = x_train.shape

In [34]:
model = keras.Sequential(
    [
     keras.Input(shape=(input_shape[1], input_shape[2], 1)),
     CNNBlock(32),
     CNNBlock(64),
     CNNBlock(128),
     layers.Flatten(),
     layers.Dense(10)
    ]
)

In [35]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cnn_block_9 (CNNBlock)       (None, 28, 28, 32)        448       
_________________________________________________________________
cnn_block_10 (CNNBlock)      (None, 28, 28, 64)        18752     
_________________________________________________________________
cnn_block_11 (CNNBlock)      (None, 28, 28, 128)       74368     
_________________________________________________________________
flatten_3 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1003530   
Total params: 1,097,098
Trainable params: 1,096,650
Non-trainable params: 448
_________________________________________________________________


In [36]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

In [37]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [38]:
print(x_val.shape, y_val.shape)
print(x_train.shape, y_train.shape)

(12000, 28, 28) (12000,)
(48000, 28, 28) (48000,)


In [39]:
history = model.fit(
    x_train,
    y_train,
    batch_size=32,
    epochs=10,
    verbose=2,
    validation_data = (x_val, y_val),
    shuffle=True
)

Epoch 1/10
1500/1500 - 55s - loss: 0.6304 - accuracy: 0.9448 - val_loss: 0.0764 - val_accuracy: 0.9818
Epoch 2/10
1500/1500 - 26s - loss: 0.0562 - accuracy: 0.9842 - val_loss: 0.0772 - val_accuracy: 0.9774
Epoch 3/10
1500/1500 - 26s - loss: 0.0419 - accuracy: 0.9862 - val_loss: 0.0675 - val_accuracy: 0.9801
Epoch 4/10
1500/1500 - 25s - loss: 0.0373 - accuracy: 0.9884 - val_loss: 0.0484 - val_accuracy: 0.9856
Epoch 5/10
1500/1500 - 25s - loss: 0.0288 - accuracy: 0.9909 - val_loss: 0.0776 - val_accuracy: 0.9810
Epoch 6/10
1500/1500 - 25s - loss: 0.0242 - accuracy: 0.9923 - val_loss: 0.0541 - val_accuracy: 0.9850
Epoch 7/10
1500/1500 - 26s - loss: 0.0152 - accuracy: 0.9946 - val_loss: 0.0661 - val_accuracy: 0.9857
Epoch 8/10
1500/1500 - 26s - loss: 0.0127 - accuracy: 0.9958 - val_loss: 0.0569 - val_accuracy: 0.9864
Epoch 9/10
1500/1500 - 26s - loss: 0.0094 - accuracy: 0.9969 - val_loss: 0.0496 - val_accuracy: 0.9893
Epoch 10/10
1500/1500 - 25s - loss: 0.0080 - accuracy: 0.9975 - val_loss:

In [40]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)
model.save("pretrained")

157/157 - 1s - loss: 0.0601 - accuracy: 0.9870




INFO:tensorflow:Assets written to: pretrained/assets


INFO:tensorflow:Assets written to: pretrained/assets


In [None]:
y_pred = model.predict(x_test)
img = x_test[1]
plt.imshow(img.reshape(28, 28))
y_pred[1]

In [42]:
y_pred_value = tf.argmax(y_pred, axis=1)

In [None]:
for i in range(y_pred_value.shape[0]):
  if y_pred_value[i] != y_test[i]:
    img = x_test[i]
    print("Actual: {}, Predicted: {}".format(y_test[i], y_pred_value[i]))
    plt.imshow(img.reshape(28, 28))
    plt.show()