# <u>Import required libraries</u>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array, array_to_img

In [2]:
import PIL

# <u>Image Augmentation because the original dataset is too small</u>

In [61]:
datagen = ImageDataGenerator(
        height_shift_range=0.1,
        width_shift_range=0.2,
        rotation_range=5,
        zoom_range=[0.9,1.8],
        fill_mode='nearest')

In [62]:
for img in os.listdir("dataset"):
    image = load_img(os.path.join("dataset",img))
    x = img_to_array(image)
    x = x.reshape((1,)+x.shape)
    
    digit = img[0]
    
    i = 1
    for batch in datagen.flow(x, batch_size=1,save_to_dir='preview', save_prefix=digit, save_format='png'):
        i += 1
        if i > 100:
            break

# <u>Data preprocessing</u>

In [63]:
def data(path,img):
    image = cv2.imread(path,cv2.IMREAD_COLOR)[...,::-1]
    image = cv2.resize(image,(100,162))
    
    digit = ''
    if img[0] == '0':
        digit = 0
    elif img[0] == '1':
        digit = 1
    elif img[0] == '2':
        digit = 2
    elif img[0] == '3':
        digit = 3
    elif img[0] == '4':
        digit = 4
    elif img[0] == '5':
        digit = 5
    elif img[0] == '6':
        digit = 6
    elif img[0] == '7':
        digit = 7
    elif img[0] == '8':
        digit = 8
    elif img[0] == '9':
        digit = 9
        
    return (image,digit)
        

In [64]:
dataset = []
for img in os.listdir("new_dataset"):
    dataset.append(data(os.path.join("./new_dataset",img),img))

In [65]:
len(dataset)

2982

# <u>Dataset shuffling for better training of the model</u>

In [66]:
import random
random.shuffle(dataset)

In [74]:
X = []
Y = []
for feature,label in dataset:
    X.append(feature)
    Y.append(label)

In [86]:
Y.count(9)

303

# <u>Unzipping feature and label data into separate numpy arrays</u>

In [68]:
X = np.array(X,dtype="float32")
Y = np.array(Y)

In [70]:
Y

array([2, 5, 2, ..., 0, 1, 3])

# <u>Feature and label data serialization for furthur usage</u>

In [71]:
import pickle
feature = open('feature','wb')
pickle.dump(X,feature)
feature.close()

In [72]:
label = open('label','wb')
pickle.dump(Y,label)
label.close()