# <u>Import required libraries</u>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array, array_to_img
import PIL

# <u>Image Augmentation because the original dataset is too small</u>

In [28]:
datagen = ImageDataGenerator(
        height_shift_range=0.1,
        width_shift_range=0.1,
        rotation_range=5,
        zoom_range=[0.9,1.8],
        fill_mode='nearest')

In [29]:
for img in os.listdir("dataset"):
    image = load_img(os.path.join("dataset",img))
    x = img_to_array(image)
    x = x.reshape((1,)+x.shape)
    
    alpha = img[0]
    
    i = 1
    for batch in datagen.flow(x, batch_size=1,save_to_dir='preview', save_prefix=alpha, save_format='png'):
        i += 1
        if i > 10:
            break

# <u>Data preprocessing</u>

In [30]:
def data(path,img):
    image = cv2.imread(path,cv2.IMREAD_COLOR)[...,::-1]
    image = cv2.resize(image,(100,162))
    
    alpha = ord(img[0])-65
        
    return (image,alpha)
        

In [31]:
dataset = []
for img in os.listdir("new_dataset"):
    dataset.append(data(os.path.join("./new_dataset",img),img))

In [32]:
len(dataset)

7921

In [33]:
dataset[7500][1]

24

# <u>Dataset shuffling for better training of the model</u>

In [40]:
import random
random.shuffle(dataset)

In [41]:
X = []
Y = []
for feature,label in dataset:
    X.append(feature)
    Y.append(label)

In [42]:
Y.count(10)

301

# <u>Unzipping feature and label data into separate numpy arrays</u>

In [43]:
X = np.array(X,dtype="float32")
Y = np.array(Y)

In [44]:
Y

array([ 2, 15, 12, ..., 25, 16, 20])

# <u>Feature and label data serialization for furthur usage</u>

In [45]:
import pickle
feature = open('feature','wb')
pickle.dump(X,feature)
feature.close()

In [46]:
label = open('label','wb')
pickle.dump(Y,label)
label.close()