Collecting Images

In [None]:
def screen_capture():
    import pyscreenshot as ImageGrab
    import time

    #images_folder = "captured_images/0/"
    images_folder = "new_images/" # this is for testing
    
    for i in range(10):
        time.sleep(10)
        im = ImageGrab.grab(bbox = (150, 200, 1150, 800)) # coordinates capturing image on screen
        print("saved..", i)
        im.save(images_folder+str(i)+'.png')
        print("Clear screen and redraw again...")

screen_capture()

Create data with label

In [6]:
import numpy as np # pip install numpy

def create_label(image_name):
    """ Create an one-hot encoded vector from image name """
    if image_name == '0':  
        return np.array([1,0,0,0,0,0,0,0,0,0])
    elif image_name == '1':
        return np.array([0,1,0,0,0,0,0,0,0,0])
    elif image_name == '2':
        return np.array([0,0,1,0,0,0,0,0,0,0])
    elif image_name == '3':
        return np.array([0,0,0,1,0,0,0,0,0,0])
    elif image_name == '4':
        return np.array([0,0,0,0,1,0,0,0,0,0])
    elif image_name == '5':
        return np.array([0,0,0,0,0,1,0,0,0,0])
    elif image_name == '6':
        return np.array([0,0,0,0,0,0,1,0,0,0])
    elif image_name == '7':
        return np.array([0,0,0,0,0,0,0,1,0,0])
    elif image_name == '8':
        return np.array([0,0,0,0,0,0,0,0,1,0])
    elif image_name == '9':
        return np.array([0,0,0,0,0,0,0,0,0,1])

In [8]:
# install require dependencies
import os
import cv2 #pip install opencv-python
from random import shuffle
from tqdm import tqdm
 
def create_data():
    data = []
    for folder in tqdm(os.listdir("captured_images")):
        # Iterate through each image file in the current folder
        for img in os.listdir("captured_images/"+folder):  # Construct path to the image file
            path = os.path.join("captured_images",folder, img)  # Read image in grayscale mode
            #use image read from open cv
            img_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE) 
            try:
                #resize image
                img_data = cv2.resize(img_data, (28,28))
            except cv2.error as e:
                continue
            data.append([np.array(img_data), create_label(folder)])
    shuffle(data)  # Randomly shuffle the data to prevent sequence bias
    return data  # Return the shuffled list of image data and labels

In [9]:
data = create_data()


00%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 144.94it/s]

Divide data into training and testing sets

In [None]:
train = data[:800]
test = data[800:]
X_train = np.array([i[0] for i in train]).reshape(-1, 28,28, 1)
y_train = [i[1] for i in train]
X_test = np.array([i[0] for i in test]).reshape(-1, 28,28, 1)
y_test = [i[1] for i in test]

In [None]:
# install required dependencies
import warnings
warnings.filterwarnings('ignore')
 
import tensorflow as tf
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

In [None]:
# Reset the default TensorFlow graph to clear any residual from previous runs
tf.reset_default_graph()

# Define the input layer with shape 28x28 pixels and 1 color channel (grayscale)
convnet = input_data(shape=[28,28, 1], name='input')
# Add a convolutional layer with 32 filters of size 5x5 and ReLU activation
convnet = conv_2d(convnet, 32, 5, activation='relu')
# Add a max pooling layer with a 5x5 pooling window to reduce spatial dimensions
convnet = max_pool_2d(convnet, 5)
# Add another convolutional layer with 64 filters of size 5x5 and ReLU activation
convnet = conv_2d(convnet, 64, 5, activation='relu')
# Add another max pooling layer with a 5x5 pooling window
convnet = max_pool_2d(convnet, 5)
# Add a convolutional layer with 128 filters of size 5x5 and ReLU activation
convnet = conv_2d(convnet, 128, 5, activation='relu')
# Add a max pooling layer with a 5x5 pooling window
convnet = max_pool_2d(convnet, 5)
# Add another convolutional layer with 64 filters of size 5x5 and ReLU activation
convnet = conv_2d(convnet, 64, 5, activation='relu')
# Add another max pooling layer with a 5x5 pooling window
convnet = max_pool_2d(convnet, 5)
# Add a convolutional layer with 32 filters of size 5x5 and ReLU activation
convnet = conv_2d(convnet, 32, 5, activation='relu')
# Add a max pooling layer with a 5x5 pooling window
convnet = max_pool_2d(convnet, 5)
# Add a fully connected layer with 1024 neurons and ReLU activation
convnet = fully_connected(convnet, 1024, activation='relu')
# Apply dropout to prevent overfitting, keeping 50% of units active
convnet = dropout(convnet, 0.5)
# Add a fully connected layer with 10 neurons (one for each class) and softmax activation
convnet = fully_connected(convnet, 10, activation='softmax')
# Define the regression layer to set optimizer, learning rate, and loss function
convnet = regression(convnet, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='targets')
# Initialize the model with verbose output for TensorFlow to display detailed logs
model = tflearn.DNN(convnet, tensorboard_verbose=1)

# Fit the model on training data and validate it on test data, running for 12 epochs
model.fit({'input': X_train}, {'targets': y_train}, n_epoch=12, validation_set=({'input': X_test}, {'targets': y_test}), show_metric=True)

Predict and display output using matplotlib

In [None]:
# same step as creating data above
def create_test_data():
    data = []
    for img in tqdm(os.listdir("new_images")):
        path = os.path.join("new_images", img)
        img_num = img.split('.')[0] 
        img_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        try:
            img_data = cv2.resize(img_data, (28,28))
        except cv2.error as e:
            continue
        data.append([np.array(img_data), img_num])
 
    shuffle(data)
    return data

In [None]:
test_data = create_test_data()

In [None]:
import matplotlib.pyplot as plt  # Import the Matplotlib library for plotting

# Create a figure with a specified size
fig = plt.figure(figsize=(10,10))

# Loop through the first 10 samples in the test data
for num, data in enumerate(test_data[:10]):  
    img_data = data[0]  # Extract the image data
    y = fig.add_subplot(5, 5, num + 1)  # Add a subplot in a 5x5 grid for each image
    orig = img_data  # Save the original image for display
    data = img_data.reshape(28,28, 1)  # Reshape the image to (28, 28, 1) for model input

    # Predict the label using the trained model
    model_out = model.predict([data])  
    str_label = "Prediction: " + str(np.argmax(model_out))  # Get the predicted class label

    # Display the image in grayscale
    y.imshow(orig, cmap='gray')  
    plt.title(str_label)  # Set the title as the prediction
    y.axes.get_xaxis().set_visible(False)  # Hide x-axis
    y.axes.get_yaxis().set_visible(False)  # Hide y-axis

# Show the figure with all predictions
plt.show()

Tutorial Reference & Original Source: https://igtechteam.wordpress.com/2023/06/25/handwritten-digit-recognition-using-cnn-deep-learning-project/