In [1]:
import tensorflow as tf
import numpy as np
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
def load_az_dataset(datasetPath):
    # initialize the list of data and labels
    data = []
    labels = []
    # loop over the rows of the A-Z handwritten digit dataset
    for row in open(datasetPath):
        # parse the label and image from the row
        row = row.split(",")
        label = int(row[0])
        image = np.array([int(x) for x in row[1:]], dtype="uint8")
        # images are represented as single channel (grayscale) images
        # that are 28x28=784 pixels -- we need to take this flattened
        # 784-d list of numbers and repshape them into a 28x28 matrix
        image = image.reshape((28, 28))
        # update the list of data and labels
        data.append(image)
        labels.append(label)
    # convert the data and labels to NumPy arrays
    data = np.array(data, dtype="float32")
    labels = np.array(labels, dtype="int")
    # return a 2-tuple of the A-Z data and labels
    return (data, labels)

In [3]:
def load_mnist_dataset():
    # load the MNIST dataset and stack the training data and testing
    # data together (we'll create our own training and testing splits
    # later in the project)
    ((trainData, trainLabels), (testData, testLabels)) = tf.keras.datasets.mnist.load_data()
    data = np.vstack([trainData, testData])
    labels = np.hstack([trainLabels, testLabels])
    # return a 2-tuple of the MNIST data and labels
    return (data, labels)

In [4]:
(azData, azLabels) = load_az_dataset('data/NIST_SD19/A_Z Handwritten Data/A_Z Handwritten Data.csv')
(digitsData, digitsLabels) = load_mnist_dataset()

In [5]:
azLabels += 10
data = np.vstack([azData, digitsData])
labels = np.hstack([azLabels, digitsLabels])

In [6]:
labelNames = "0123456789"
labelNames += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
labelNames = [l for l in labelNames]

In [7]:
data = [cv2.resize(image, (36, 36)) for image in data]
data = np.array(data, dtype="float32")
data /= 255.0

In [8]:
# convert the labels from integers to vectors
le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)
# account for skew in the labeled data
classTotals = labels.sum(axis=0)
classWeight = {}
# loop over all classes and calculate the class weight
for i in range(0, len(classTotals)):
    classWeight[i] = classTotals.max() / classTotals[i]

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

# Mapping characters to integers.
char_to_num = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=labelNames, mask_token=None)

# Mapping integers back to original characters.
num_to_char = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

def process_images_labels(image, label):
    return {"image": tf.convert_to_tensor(image), "label": tf.convert_to_tensor(label)}

def prepare_dataset(images, labels):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels)).map(
        process_images_labels, num_parallel_calls=AUTOTUNE)
    return dataset.batch(128).cache().prefetch(AUTOTUNE)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    labels,
                                                    test_size=0.2,
                                                    random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.2,
                                                    random_state=42)

train_dataset = prepare_dataset(X_train, y_train)
val_dataset = prepare_dataset(X_val, y_val)
test_dataset = prepare_dataset(X_test, y_test)

In [11]:
def build_model():
    # Inputs to the model
    inp = tf.keras.Input(shape=(36, 36, 1), name="image")

    # First conv block.
    x = tf.keras.layers.Conv2D(
        32,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv1",
    )(inp)
    # x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)

    # Second conv block.
    x = tf.keras.layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv2",
    )(x)
    # x = tf.keras.layers.MaxPooling2D((2, 2), name="pool2")(x)

    # The number of
    # filters in the last layer is 64. Reshape accordingly before
    # passing the output to the RNN part of the model.
    new_shape = (36, (36 * 64))
    x = tf.keras.layers.Reshape(target_shape=new_shape, name="reshape")(x)
    x = tf.keras.layers.Dense(64, activation="relu", name="dense1")(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    # RNNs.
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(128, return_sequences=True, dropout=0.25)
    )(x)
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.25)
    )(x)
    
    # flatten
    x = tf.keras.layers.Flatten()(x)

    output = tf.keras.layers.Dense(
        len(labelNames), activation="softmax", name="dense2"
    )(x)

    # Define the model.
    model = tf.keras.models.Model(
        inputs=inp, outputs=output, name="handwriting_recognizer"
    )
    # Optimizer.
    opt = tf.keras.optimizers.Adam()
    # Compile the model and return.
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Get the model.
model = build_model()
model.summary()

Model: "handwriting_recognizer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image (InputLayer)          [(None, 36, 36, 1)]       0         
                                                                 
 Conv1 (Conv2D)              (None, 36, 36, 32)        320       
                                                                 
 Conv2 (Conv2D)              (None, 36, 36, 64)        18496     
                                                                 
 reshape (Reshape)           (None, 36, 2304)          0         
                                                                 
 dense1 (Dense)              (None, 36, 64)            147520    
                                                                 
 dropout (Dropout)           (None, 36, 64)            0         
                                                                 
 bidirectional (Bidirectiona  (None, 36, 256

In [13]:
epochs = 50  # To get good results this should be at least 50.

model = build_model()
prediction_model = tf.keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)

# Train the model.
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=epochs
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
model.save('model/Complex_Model.model')

In [15]:
import pickle

with open('hist/Complex_Model.model', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)

In [10]:
# optional model load
model = tf.keras.models.load_model('model/Complex_Model.hist')

In [16]:
predictions = model.predict(X_test[:3000])
print(classification_report(y_test[:3000].argmax(axis=1), predictions.argmax(axis=1), target_names=labelNames))

              precision    recall  f1-score   support

           0       0.90      0.90      0.90        40
           1       0.98      1.00      0.99        64
           2       0.98      0.96      0.97        48
           3       1.00      1.00      1.00        43
           4       0.98      0.98      0.98        50
           5       1.00      0.96      0.98        45
           6       1.00      1.00      1.00        46
           7       1.00      1.00      1.00        46
           8       1.00      1.00      1.00        49
           9       1.00      0.97      0.99        40
           A       0.98      1.00      0.99        90
           B       1.00      0.98      0.99        62
           C       0.99      1.00      1.00       143
           D       0.97      0.99      0.98        72
           E       1.00      1.00      1.00        88
           F       1.00      1.00      1.00         4
           G       0.97      1.00      0.99        34
           H       1.00    

In [23]:
import gradio as gr

def classify(input):
    prediction = model.predict(np.pad(input.reshape(28, 28)), ((4,4),(4,4)), mode='constant', constant_values=0).tolist()[0]
    return {str(label): prediction[i] for i, label in enumerate(labelNames)}

label = gr.outputs.Label(num_top_classes=3)
interface = gr.Interface(fn=classify, inputs="sketchpad", outputs=label)
interface.launch()

Running on local URL:  http://127.0.0.1:7868/

To create a public link, set `share=True` in `launch()`.


(<Flask 'gradio.networking'>, 'http://127.0.0.1:7868/', None)

Traceback (most recent call last):
  File "C:\Users\monol\anaconda3\lib\site-packages\gradio\networking.py", line 237, in predict
    prediction, durations = app.interface.process(raw_input)
  File "C:\Users\monol\anaconda3\lib\site-packages\gradio\interface.py", line 411, in process
    predictions, durations = self.run_prediction(
  File "C:\Users\monol\anaconda3\lib\site-packages\gradio\interface.py", line 374, in run_prediction
    prediction = predict_fn(*processed_input)
  File "C:\Users\monol\AppData\Local\Temp/ipykernel_20860/3275017357.py", line 4, in classify
    prediction = model.predict(np.pad(input.reshape(28, 28)), ((4,4),(4,4)), mode='constant', constant_values=0).tolist()[0]
  File "<__array_function__ internals>", line 4, in pad
TypeError: _pad_dispatcher() missing 1 required positional argument: 'pad_width'
