The below code is for preprocessing the images. relevant comments are given in the code.
The steps are as follows:
i. adding padding to the images
ii. grayscaling the image
iii. inverting the image
iv. applying gaussian blur
v. adpative thresholding to convert image to black and white
vi. resizing all images to 28X28 pixels

In [None]:
#preprocessing images
from matplotlib import pyplot as plt
import cv2
import numpy as np
import re
from string import ascii_lowercase
import os
from pathlib import Path

# adding padding to the image
def padding(img):
    w, h, l = img.shape
    if w == h:
        return img
    elif w < h:
        left = (h - w)//2
        add_border = cv2.copyMakeBorder(img,0,0,left,left,cv2.BORDER_REPLICATE)
        return add_border
    else:
        top = (w - h)//2
        add_border = cv2.copyMakeBorder(img,top,top,0,0,cv2.BORDER_REPLICATE)
        return add_border

# removing noise manually using dilation, erosion and blurring of the images
def remove_noise(img):
    # import numpy as np
    kernel = np.ones((1, 1), np.uint8)
    img = cv2.dilate(img, kernel, iterations=1)
    kernel = np.ones((1, 1), np.uint8)
    img = cv2.erode(img, kernel, iterations=1)
    img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
    denoised_img = cv2.medianBlur(img, 3)
    return denoised_img

# preprocessing the image using all the above function systematically
def preprocessing(img):
    pad_img = padding(img)
    # grayscaling the image
    gray_img = cv2.cvtColor(pad_img, cv2.COLOR_BGR2GRAY)
    # inverting the image colors
    inv_img = cv2.bitwise_not(gray_img)
    #using inbuilt function to create gaussian blur
    blur_img = cv2.GaussianBlur(inv_img, (5,5), -10)
    # converting the image to black and white using adaptive thresholding
    bw_img = cv2.adaptiveThreshold(blur_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 3, 0 )
    # removing noise using in-built function
    denoise_img = cv2.fastNlMeansDenoising(bw_img, None, 20, 7, 31)
    for i in range(3):
        denoise_img = cv2.fastNlMeansDenoising(denoise_img, None, 20, 7, 31)
    for i in range(8):
        denoise_img = remove_noise(denoise_img)
    # inverting the image again
    reinv_img = cv2.bitwise_not(denoise_img)
    # resizing the image to 28X28 pixels for our neural network
    final_img = cv2.resize(reinv_img, (28,28), interpolation=cv2.INTER_AREA)
    return final_img

# parent directory of the stored images
path = r"C:\Users\amite\OneDrive\Desktop\Explo"
p = Path(path)

# function to make folders for future labelling
def folder():
    path = r"C:\Users\amite\OneDrive\Desktop\Explo\Preprocessed\\"
    for c in ascii_lowercase:
        os.mkdir(os.path.join(path, c))
        os.mkdir(os.path.join(path, c+'_'))
folder()

# indexing the images while saving
index=0
# iterating through all the cropped images, preprocessing them and saving them in the made labelled folders
for i in p.glob("**/*"):
    pattern = '.*(jpe?g|png)'
    # matching the pattern as .png to recognise images
    if re.match(pattern, str(i)) and len(str(i.name)) < 9:
        img_path = str(i)
        image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        image = preprocessing(image)
        dest_path = r"C:\Users\amite\OneDrive\Desktop\Explo\Preprocessed\\"
        if i.name[0].isupper():
            dest_path+=i.name[0]+"_"
        else:
            dest_path+=i.name[0]
        os.chdir(dest_path)
        cv2.imwrite(str(index)+".png", image)
        print(index, "file(s) processed")
        index +=1

The code below is for cleaning the borders of the images by setting border pixel arrays (upto 2 pixel width) to 0 (black).

In [None]:
# removing border pixels that were present in the image
import pandas as pd

# function to remove border pixels which can cause inconsistency in data
def cleanborder(img):
    for i in range(2):
        img[i][:]=(0,0,0)
    for i in range(2):
        img[27-i][:]=(0,0,0)
    for i in range(2):
        img[:][i]=(0,0,0)
    for i in range(2):
        img[:][27-i]=(0,0,0)
    return img

# making the required label folders
def folder():
    path = r"C:\Users\amite\OneDrive\Desktop\Explo\More_Preprocessed\\"
    for c in ascii_lowercase:
        os.mkdir(os.path.join(path, c))
        os.mkdir(os.path.join(path, c+'_'))
folder()

parent_folder_path = r"C:\Users\amite\OneDrive\Desktop\Explo\Preprocessed\\"
p = Path(parent_folder_path)

# iterating through all preprocessed images, removing borders, then saving them in new folder
x = 0
for i in p.glob("**/*"):
    pattern = '.*(jpe?g|png)'
    if re.match(pattern, str(i)):
        img_path = str(i)
        image = cv2.imread(img_path)
        cleaned_img = cleanborder(image)
        dest_path = r"C:\Users\amite\OneDrive\Desktop\Explo\More_Preprocessed\\"
        if str(i.parent)[-1]=="_":
            dest_path+=str(i.parent)[-2]+"_"
        else:
            dest_path+=str(i.parent)[-1]
        os.chdir(dest_path)
        print(dest_path)
        cv2.imwrite(str(x)+".png", cleaned_img)
        print(x, "file(s) processed")
        x += 1

Previously images were not labelled well so we have relabelled them.

In [None]:
# labelling the data
import shutil
source_folder = r"C:\Users\amite\OneDrive\Desktop\Exploration-2\testing\\"
dest_folder = r"C:\Users\amite\OneDrive\Desktop\preprocesses\labelled_images\\"
def makefolder():
    for cha in ascii_lowercase:
        os.makedirs(os.path.join(dest_folder, cha),exist_ok=True)
        os.makedirs(os.path.join(dest_folder, cha+'_'),exist_ok=True)
makefolder()
for i in (os.listdir(source_folder)):
    for count,j in enumerate(os.listdir(source_folder+i)):
        src = source_folder + i + "\\" + j
        dst = dest_folder + i + "\\" + str(count+800) + ".png"
        # print(src,'\n',dst)
        shutil.copy(src,dst)
        print("Processed image:",i,count)

Splitting the images into training and testing data in 60:40 split.

In [None]:
# splitting testing and training data
# all the required folder paths
root_dir = r"C:\Users\amite\OneDrive\Desktop\preprocesses\labelled_images\\"
split_dir = r"C:\Users\amite\OneDrive\Desktop\preprocesses\split_data\\"
# ratio for splitting
test_ratio = 0.4
# using python module
!pip install split-folders
import splitfolders
input_folder = root_dir
output =  split_dir

splitfolders.ratio(input_folder, output=output, seed=42, ratio=(1-test_ratio,test_ratio))