In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import warnings
warnings.filterwarnings('ignore')

In [3]:
from keras.datasets import mnist

In [4]:
(X_train,y_train),(X_test,y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
X_train.shape

(60000, 28, 28)

In [6]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [7]:
y_train.shape

(60000,)

In [8]:
X_train = np.array(X_train)
X_train = X_train / 255.0

In [9]:
X_test = np.array(X_test)
X_test = X_test / 255

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, shuffle=True, random_state=42)

In [11]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_val = X_val.reshape(X_val.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [12]:
X_train.shape

(40200, 28, 28, 1)

one hot encoding to make the network understand what digit the image actually is

In [13]:
from tensorflow.keras.utils import to_categorical
y_cat_train = to_categorical(y_train, num_classes=10)
y_cat_val = to_categorical(y_val, num_classes=10)

In [14]:
y_cat_train

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [15]:
y_cat_train.shape

(40200, 10)

data augmentation to prevent the model from overfitting and learning noise

In [16]:
train_datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1)
train_datagen.fit(X_train)
train_generator = train_datagen.flow(X_train, y_cat_train, batch_size=32)

building the model

In [17]:
model=Sequential()
model.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', input_shape=(28,28,1), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=2,padding="valid"))
model.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding="valid"))
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=10, activation='softmax'))

In [20]:
model.summary()

In [24]:
from tensorflow.keras.optimizers import Adam

In [27]:
optimizer = Adam(learning_rate=0.01)

In [28]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

model training

In [29]:
history = model.fit(x=train_generator, epochs=20, verbose=2, validation_data=(X_val, y_cat_val))

Epoch 1/20
1257/1257 - 56s - 45ms/step - accuracy: 0.9162 - loss: 0.2649 - val_accuracy: 0.9649 - val_loss: 0.1232
Epoch 2/20
1257/1257 - 56s - 45ms/step - accuracy: 0.9543 - loss: 0.1575 - val_accuracy: 0.9568 - val_loss: 0.1525
Epoch 3/20
1257/1257 - 80s - 64ms/step - accuracy: 0.9592 - loss: 0.1428 - val_accuracy: 0.9688 - val_loss: 0.1103
Epoch 4/20
1257/1257 - 54s - 43ms/step - accuracy: 0.9606 - loss: 0.1368 - val_accuracy: 0.9744 - val_loss: 0.1070
Epoch 5/20
1257/1257 - 81s - 65ms/step - accuracy: 0.9618 - loss: 0.1417 - val_accuracy: 0.9783 - val_loss: 0.0986
Epoch 6/20
1257/1257 - 54s - 43ms/step - accuracy: 0.9631 - loss: 0.1327 - val_accuracy: 0.9715 - val_loss: 0.1086
Epoch 7/20
1257/1257 - 81s - 65ms/step - accuracy: 0.9652 - loss: 0.1299 - val_accuracy: 0.9747 - val_loss: 0.1171
Epoch 8/20
1257/1257 - 82s - 66ms/step - accuracy: 0.9638 - loss: 0.1356 - val_accuracy: 0.9775 - val_loss: 0.1074
Epoch 9/20
1257/1257 - 54s - 43ms/step - accuracy: 0.9634 - loss: 0.1351 - val_a

In [30]:
preds=model.predict(X_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step


In [31]:
preds=np.argmax(preds,axis=1)

In [32]:
preds

array([7, 2, 1, ..., 4, 5, 6])

In [33]:
preds.size

10000

In [34]:
preds.shape

(10000,)

In [35]:
from sklearn.metrics import accuracy_score

In [36]:
accuracy_score(y_test, preds)

0.9764

In [37]:
from sklearn.metrics import classification_report, confusion_matrix

In [39]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.98      0.98      1032
           3       0.94      1.00      0.97      1010
           4       0.99      0.94      0.97       982
           5       0.97      0.96      0.97       892
           6       0.99      0.99      0.99       958
           7       0.97      0.98      0.98      1028
           8       0.98      0.96      0.97       974
           9       0.95      0.98      0.96      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



optimization using early stopping and dropout

In [82]:
callback = keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max', patience=5, verbose=1)

In [87]:
model_new=Sequential()
model_new.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', input_shape=(28,28,1), activation='relu'))
model_new.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', activation='relu'))
model_new.add(MaxPool2D(pool_size=(2,2),strides=2,padding="valid"))
model_new.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model_new.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model_new.add(MaxPool2D(pool_size=(2, 2), strides=2, padding="valid"))
model_new.add(Flatten())
model_new.add(Dense(units=128, activation='relu'))
model_new.add(Dense(units=10, activation='softmax'))

In [94]:
optimizer_new = Adam(learning_rate=0.001)

In [95]:
model_new.compile(
    optimizer=optimizer_new,
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy']
)

In [92]:
model_new.summary()

In [96]:
history_new = model_new.fit(x=train_generator, epochs=10, verbose=1, validation_data=(X_val, y_cat_val))

Epoch 1/10
[1m1257/1257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 146ms/step - categorical_accuracy: 0.7960 - loss: 0.6132 - val_categorical_accuracy: 0.9830 - val_loss: 0.0580
Epoch 2/10
[1m1257/1257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 144ms/step - categorical_accuracy: 0.9702 - loss: 0.0957 - val_categorical_accuracy: 0.9880 - val_loss: 0.0397
Epoch 3/10
[1m1257/1257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 143ms/step - categorical_accuracy: 0.9800 - loss: 0.0659 - val_categorical_accuracy: 0.9878 - val_loss: 0.0383
Epoch 4/10
[1m1257/1257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 143ms/step - categorical_accuracy: 0.9841 - loss: 0.0535 - val_categorical_accuracy: 0.9902 - val_loss: 0.0318
Epoch 5/10
[1m1257/1257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 143ms/step - categorical_accuracy: 0.9862 - loss: 0.0439 - val_categorical_accuracy: 0.9826 - val_loss: 0.0591
Epoch 6/10
[1m1257/1257[0m [32m━━━━━━

In [97]:
preds_new=model_new.predict(X_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 30ms/step


In [98]:
preds_new=np.argmax(preds_new,axis=1)

In [99]:
preds_new

array([7, 2, 1, ..., 4, 5, 6])

In [100]:
accuracy_score(y_test, preds_new)

0.9935

In [101]:
print(classification_report(y_test, preds_new))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00       980
           1       1.00      1.00      1.00      1135
           2       1.00      0.99      1.00      1032
           3       0.99      1.00      1.00      1010
           4       1.00      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.99      1.00      0.99      1028
           8       0.99      1.00      0.99       974
           9       0.99      0.98      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

