In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import sklearn
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.utils.np_utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Conv2D, MaxPool2D, Dense, Flatten
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import backend as K
from sklearn.metrics import accuracy_score, classification_report

%matplotlib inline

In [2]:
data = os.listdir("Digits")
data_X, data_Y = [], []
num_classes = len(data)
for i in range(num_classes):
    data_list = os.listdir(f"Digits/{i}")
    for j in data_list:
        img = cv2.imread(f"Digits/{i}/{j}")
        img = cv2.resize(img, (32, 32))
        data_X.append(img)
        data_Y.append(i)
if len(data_X) == len(data_Y):
    print(f"Total Datapoints: {len(data_X)}")
else: print("Not all data extracted")
data_X = np.array(data_X)
data_Y = np.array(data_Y)

Total Datapoints: 10160


In [3]:
train_X, test_X, train_y, test_y = train_test_split(data_X, data_Y, test_size=0.05, random_state=42)
train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.2, random_state=42)
print(f"Training Size = {len(train_X)}")
print(f"Validation Size = {len(valid_X)}")
print(f"Test Size = {len(test_X)}")

Training Size = 7721
Validation Size = 1931
Test Size = 508


In [4]:
def preprocess(img: np.ndarray) -> np.ndarray:
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.equalizeHist(img)
    img = img/255.0
    return img

In [5]:
train_X = np.array(list(map(preprocess, train_X)))
test_X = np.array(list(map(preprocess, test_X)))
valid_X = np.array(list(map(preprocess, valid_X)))
train_X = np.expand_dims(train_X, axis=-1)
test_X = np.expand_dims(test_X, axis=-1)
valid_X = np.expand_dims(valid_X, axis=-1)
print(train_X.shape)

(7721, 32, 32, 1)


In [6]:
datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, shear_range=0.1, rotation_range=10)

In [7]:
datagen.fit(train_X)

In [8]:
train_y.shape

(7721,)

In [9]:
train_y = to_categorical(train_y, num_classes)
test_y = to_categorical(test_y, num_classes)
valid_y = to_categorical(valid_y, num_classes)

In [10]:
model = Sequential([
    Conv2D(60, 5, input_shape=(32, 32, 1), padding='same', activation='relu'),
    Conv2D(60, 5, padding='same', activation='relu'),
    MaxPool2D(2),
    Conv2D(30, 3, padding='same', activation='relu'),
    Conv2D(30, 3, padding='same', activation='relu'),
    MaxPool2D(2, strides=2),
    Dropout(0.5),
    Flatten(),
    Dense(500, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

2021-11-28 13:12:16.374978: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 60)        1560      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 60)        90060     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 60)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 30)        16230     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 30)        8130      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 30)          0         
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 30)          0

In [12]:
opt = RMSprop(lr=0.001, rho=0.9, epsilon = 1e-08, decay=0.0)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [18]:
path = "best_weights.hdf5"
checkpoint = ModelCheckpoint(path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callback = [checkpoint]

In [19]:
history = model.fit(datagen.flow(train_X, train_y, batch_size=32), epochs = 30, validation_data = (valid_X, valid_y), verbose = 1, steps_per_epoch= 200, callbacks=callback)

Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.98187, saving model to best_weights.hdf5
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.98187 to 0.98912, saving model to best_weights.hdf5
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.98912 to 0.99068, saving model to best_weights.hdf5
Epoch 4/30

Epoch 00004: val_accuracy did not improve from 0.99068
Epoch 5/30

Epoch 00005: val_accuracy did not improve from 0.99068
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.99068 to 0.99275, saving model to best_weights.hdf5
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.99275
Epoch 8/30

Epoch 00008: val_accuracy improved from 0.99275 to 0.99379, saving model to best_weights.hdf5
Epoch 9/30

Epoch 00009: val_accuracy did not improve from 0.99379
Epoch 10/30

Epoch 00010: val_accuracy did not improve from 0.99379
Epoch 11/30

Epoch 00011: val_accuracy did not improve from 0.99379
Epoch 12/30

Epoch 00012: val_accuracy improved from 0.99379 to 0.995

In [20]:
score = model.evaluate(test_X, test_y)



In [21]:
print('Test Score = ',score[0])
print('Test Accuracy =', score[1])

Test Score =  0.03501947224140167
Test Accuracy = 0.9960629940032959
