In [1]:
# Import Statements
import cv2
import numpy as np
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense, ReLU, LeakyReLU, Activation

In [2]:
# Function to reset whiteboard to white in between trials  (When you click e)
def resetWhiteboard(display): 
    # Coordinates of whiteboard area
    x1, x2, y1, y2 = whiteboardArea['x'][0], whiteboardArea['x'][1], whiteboardArea['y'][0], whiteboardArea['y'][1]
    
    # Set it to white
    display[y1-10:y2+12, x1-10:x2+12] = (255, 255, 255)
    
    return display

In [3]:
# Function to create whiteboard (general GUI of application)
def createDisplay():
    # Creating title, board, and panel
    title = np.zeros((80, 950, 3), dtype=np.uint8)
    board = np.zeros((600, 650, 3), dtype=np.uint8)
    panel = np.zeros((600, 300, 3), dtype=np.uint8)

    # Creating whiteboard area on board
    whiteboardArea = (8, 5, 645, 590)
    board[whiteboardArea[1]:whiteboardArea[3], whiteboardArea[0]:whiteboardArea[2]] = (255, 255, 255)

    # Purple theme for rest of board
    cv2.rectangle(board, (whiteboardArea[0], whiteboardArea[1]), (whiteboardArea[2], whiteboardArea[3]), (255, 0, 179), 3)
    cv2.rectangle(panel, (1, 4), (290, 590), (255, 0, 179), 2)
    cv2.rectangle(panel, (22, 65), (268, 280), (255, 255, 255), 1)
    cv2.rectangle(panel, (22, 340), (268, 560), (255, 255, 255), 1)
    cv2.line(panel, (145, 340), (145, 560), (255, 255, 255), 1)
    cv2.line(panel, (22, 380), (268, 380), (255, 255, 255), 1)

    # Adding text
    cv2.putText(title, "    " + windowName, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 2)
    cv2.putText(panel, "Action: ", (23, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
    cv2.putText(panel, "Top 3 Predictions", (52, 320), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1)
    cv2.putText(panel, "Prediction", (42, 362), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    cv2.putText(panel, "Accuracy %", (168, 362), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    cv2.putText(panel, actions[0], (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, actionColors[actions[0]], 1)

    # Combining diff parts into one display (All images)
    display = np.concatenate((board, panel), axis=1)
    display = np.concatenate((title, display), axis=0)

    return display

In [4]:
# Function to display rest of information on top of previously created background image
def createPanel(display):
    # Setting status panel to black
    for region in statusAreas.values():
        pt1, pt2 = region
        display[pt1[1]:pt2[1], pt1[0]:pt2[0]] = (0, 0, 0)

    # When cropped, display in top right (Preview of crop)
    if cropPreview is not None:
        pt1, pt2 = statusAreas["preview"]
        display[pt1[1]:pt2[1], pt1[0]:pt2[0]] = cv2.resize(cropPreview, (cropPreviewHeight, cropPreviewWidth))

    # Display top 3 predictions in the predictions array appropriately
    if topPredictions:
        currentPredictionCoords = [((725, 505), (830, 505), (0, 255, 0)),((725, 562), (830, 562), (0, 179, 255)),((725, 619), (830, 619), (0, 0, 255))]
        for i, (layer, acc) in enumerate(topPredictions.items()):
            if i >= len(currentPredictionCoords):
                break
            layerCoord, accCoord, color = currentPredictionCoords[i]
            cv2.putText(display, layer, layerCoord, cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
            cv2.putText(display, str(acc), accCoord, cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        # If there are less than three predictions, display "_" instead
        for i in range(len(topPredictions), 3):
            layerCoord, accCoord, color = currentPredictionCoords[i]
            cv2.putText(display, "_", layerCoord, cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
            cv2.putText(display, "_", accCoord, cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # Display current action (Draw, Crop, or nothing [N/A])
    cv2.putText(display, currentAction, (745, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, actionColors[currentAction], 1)


In [5]:
# Main Function to handle events (Drawing, erasing, cropping)
def onMouseClick(event, x, y, flags, params):
    # Getting x and y limits for whiteboard
    x_min, x_max = whiteboardArea["x"]
    y_min, y_max = whiteboardArea["y"]
    global leftMouseDown, rightMouseDown
    global display, boundRectangleCoords, cropPreview, topPredictions
    
    # If action is draw: proceed to handle it
    if currentAction == actions[1]:
        # Checking type of mouse event and update button state
        if event == cv2.EVENT_RBUTTONUP:
            rightMouseDown = False
        elif event == cv2.EVENT_LBUTTONUP:
             leftMouseDown = False
        # Otherwise if you are within drawing bounds
        elif x_min <= x <= x_max and y_min <= y <= y_max:
            # and if you are left clicking and moving (draw) or right clicking and moving (erase [Draw in white])
            if event in [cv2.EVENT_LBUTTONDOWN, cv2.EVENT_MOUSEMOVE]:
                # Set color of circle (combination of circles create line) based on button state (western purple for draw, white for erase)
                color = (255, 0, 179) if leftMouseDown else (255, 255, 255) if rightMouseDown else None
                if event == cv2.EVENT_LBUTTONDOWN:
                    leftMouseDown = True
                elif (event == cv2.EVENT_MOUSEMOVE and leftMouseDown):
                    pass
                elif event == cv2.EVENT_MOUSEMOVE and rightMouseDown:
                    pass
                else:
                    return
                
                # Draw a circle (filled) on the board and show it (combination of circles create line)
                # (color is either white or purple for draw/erase)
                # Technically we never erase we just draw over it in white
                cv2.circle(display, (x, y), 8, color, -1)
                cv2.imshow(windowName, display)
            elif event == cv2.EVENT_RBUTTONDOWN:
                rightMouseDown = True
    
    # If action is crop: proceed to handle it
    elif currentAction == actions[2]:
        # When user clicks left mouse, store coordinates of starting point (Top left corner of rectangle)
        if event == cv2.EVENT_LBUTTONDOWN:
            boundRectangleCoords = (x, y)
        # When user releases left mouse, crop image, invert colors, 
        # Predict character, display selection in top right, show predictions in bottom left
        elif event == cv2.EVENT_LBUTTONUP:
            topCoord, bottomCoord = boundRectangleCoords, (x, y)
            cropPreview = display[topCoord[1]:bottomCoord[1], topCoord[0]:bottomCoord[0]].copy()
            cropPreview = cv2.bitwise_not(cropPreview)
            topPredictions = predict(model, cropPreview)
            displayCopy = display.copy()
            cv2.rectangle(displayCopy, topCoord, bottomCoord, (255, 0, 179), 2)
            createPanel(displayCopy)
            cv2.imshow(windowName, displayCopy)
            boundRectangleCoords = None

        # Showing rectange that highlights the croppped part (When left mouse is clicked down)
        elif event == cv2.EVENT_MOUSEMOVE and boundRectangleCoords is not None:
            topCoord, bottomCoord = boundRectangleCoords, (x, y)
            displayCopy = display.copy()
            cv2.rectangle(displayCopy, topCoord, bottomCoord, (255, 0, 179), 2)
            cv2.imshow(windowName, displayCopy)

        # When there is nothing selected, display all previous info (ie: change nothing)
        elif event == cv2.EVENT_LBUTTONUP and boundRectangleCoords is None:
            createPanel(display)
            cv2.imshow(windowName, display)

In [6]:
# Function to normalize input data to improve training and model performance.
def load_model(path):
    # Defining input shape
    inputs = Input(shape=(28, 28, 1))
    
    # Adding a convolutional layer with 32 filters (5x5) / Using ReLU activation
    x = Conv2D(32, (5, 5), activation="relu")(inputs)
    
    # Adding batch normalization to normalize activations of previous layer
    x = BatchNormalization()(x)
    
    # Repeat
    x = Conv2D(32, (5, 5), activation="relu")(x)
    x = BatchNormalization()(x)
    
    # Adding a max pooling layer (pool size 2x2) / This should downsample the input along spatial dimensions
    x = MaxPooling2D((2, 2))(x)
    
    # Adding dropout regularization to randomly set 25% of activations to 0 / This should help prevent overfitting
    x = Dropout(0.25)(x)
    
    # Batch normalization again
    x = BatchNormalization()(x)
    
    # Flattening output of previous layer
    x = Flatten()(x)
    
    # Adding a fully connected dense layer with 256 units + ReLU activation
    x = Dense(256, activation="relu")(x)
    
    # Repeat with 36 units and softmax activation / This is the output layer (predicted probabilites)
    outputs = Dense(36, activation="softmax")(x)
    
    # Defining model with input/output layers
    model = Model(inputs=inputs, outputs=outputs)
    
    # Compiling model with conditions seen below
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    
    # Loading weights of model from given path
    model.load_weights(path)

    return model


In [7]:
# Main prediction function
def predict(model, image):
    # Defining list of layers (All capital letters and numbers)
    layers = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M','N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z','0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    
    # Converting image to grayscale, resizing it, and normalizing pixel values of image between 0 and 1
    image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), (28, 28)) / 255.0

    # Reshape image to match input shape of model
    image = np.reshape(image, (1, image.shape[0], image.shape[1], 1))
    
    # Using model to predict
    prediction = model.predict(image)
    
    # Finding top 3 predicted layers + accuracy
    topPredictions = {}
    indices = np.argsort(prediction)[0][::-1][:3] # Getting indices of top 3 guesses in descending order
    for i in indices:
        acc = round(prediction[0][i] * 100, 1)
        if acc > 0:
            layer = layers[i]
            topPredictions[layer] = acc

    return topPredictions

In [8]:
# Main running code (main function basically)
# Mouse buttons start off unclicked
leftMouseDown = rightMouseDown = False 
boundRectangleCoords = lbdCoord = lbuCoord = None 

# Defining whiteboard area
whiteboardArea = {"x": (20, 633), "y": (98, 657)} 
windowName = "D: Draw | C: Crop | R: Reset | E: End"

# Creating dictionary to store results
topPredictions = dict() 
cropPreviewHeight, cropPreviewWidth, cropPreview = 238, 206, None

# All available actions
actions = ["NONE", "DRAW", "CROP"]

# Their respective word colors
actionColors = {actions[0]: (0, 0, 255), actions[1]: (0, 255, 0),actions[2]: (0, 255, 255)}

# Action is nothing yet
currentAction = actions[0]

# Putting layers where they belong
statusAreas = {"Action": ((736, 97), (828, 131)),"preview": ((676, 150), (914, 356)),"layers": ((678, 468), (790, 632)),"accs": ((801, 468), (913, 632))}

# Loading prediction model
model = load_model("../models/validationModel.h5")

# Setting up the display (It is an image)
display = createDisplay()
cv2.imshow(windowName, display)
cv2.setMouseCallback(windowName, onMouseClick)

# Loop made to constantly check for input and react appropriately
# When key press is detected (1 second intervals) new action name is displayed and action is changed
while True:
    k = cv2.waitKey(1)
    # D for draw
    if k == ord('d'):
        currentAction = actions[1]
        createPanel(display)
        cv2.imshow(windowName, display)
    # C for crop
    elif k == ord('c'):
        currentAction = actions[2]
        createPanel(display)
        cv2.imshow(windowName, display)
    # R for reset
    elif k == ord('r'):
        resetWhiteboard(display)
        cv2.imshow(windowName, display)
    # E for end
    elif k == ord('e'):
        break
cv2.destroyAllWindows()

ValueError: Layer count mismatch when loading weights from file. Model expected 7 layers, found 6 saved layers.