In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

Download the **EMNIST** (Extended MNIST) dataset from kaggle

In [None]:
import kagglehub

path = kagglehub.dataset_download("crawford/emnist")

After reading the documentation of EMNIST datasets on kaggle I have decided to use the EMNIST balanced dataset out of the options below. It has the same number of examples of each class. In this way the model will not be biased by some letters that are more oftenly used. If the task was to recognise actual text with real words, I think a little bias would be more or less acceptable, but when talking about VIN, i feel like the probability of getting every letter / digit is equal, so I think this is the correct choice

In [None]:
os.listdir(path)

Load the dataset into the the dataframe

In [None]:
train_data_file_path = os.path.join(path, 'emnist-balanced-train.csv')
train_data = pd.read_csv(train_data_file_path, header=None)

train_data.head()

In [None]:
test_data_file_path = os.path.join(path, 'emnist-balanced-test.csv')
test_data = pd.read_csv(test_data_file_path, header=None)

test_data.head()

These datasets currently contain digits, upper case letters and some lower case letters that are different from the upper case ones. After doing a bit of research about VIN I realised that it consists only of digits and upper case letters, so I will filter out lower case ones

In [None]:
class_mapping = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabdefghnqrt'
chars_to_remove = 'abdefghnqrtIOQ'
updated_class_mapping = ''.join([c for c in class_mapping if c not in chars_to_remove])

In [None]:
def remove_rows_by_label(df, input_string):
    print(f'unique count before: {len(df[0].unique())}')
    print(f'chars to remove: {input_string}')

    old_to_new_mapping = {class_mapping.index(c): updated_class_mapping.index(c) if c in updated_class_mapping else -1 for c in class_mapping}
    print(old_to_new_mapping)
    df.iloc[:, 0] = df.iloc[:, 0].map(old_to_new_mapping)

    df = df[df.iloc[:, 0] != -1].reset_index(drop=True)

    print(f'unique count after: {len(df[0].unique())}')
    print(f'max label after: {max(df[0])}')

    return df

In [None]:
train_data = remove_rows_by_label(train_data, chars_to_remove)

In [None]:
test_data = remove_rows_by_label(test_data, chars_to_remove)

The desired input imagesfor classification are in squares, lets augment the training and testing data with a padding

In [None]:
def show_row(df, row_num, side):
    img = df.values[row_num, 1:].reshape([side, side])
    data_class = df.values[row_num, 0]
    label = updated_class_mapping[data_class]

    plt.figure(figsize=(3,3))
    plt.imshow(img, cmap='Greys_r')
    plt.title(f'Class: {data_class} ({label})')
    plt.axis('off')
    plt.show()

In [None]:
def pad_and_transpose_data(df):
    padded_images = []

    for index, row in df.iterrows():
        label = row[0]
        flattened_image = row[1:].values

        image_28x28 = flattened_image.reshape(28, 28)
        image_30x30 = np.pad(image_28x28, pad_width=1, mode='constant', constant_values=255)
        image_30x30 = np.transpose(image_30x30, axes=[1,0])
        flattened_padded_image = image_30x30.flatten()

        padded_image_with_label = np.insert(flattened_padded_image, 0, label)
        padded_images.append(padded_image_with_label)

    padded_df = pd.DataFrame(padded_images)
    return padded_df

In [None]:
show_row(train_data, 0, 28)

In [None]:
padded_train_data = pad_and_transpose_data(train_data)

In [None]:
show_row(padded_train_data, 0, 30)

In [None]:
padded_test_data = pad_and_transpose_data(test_data)

In [None]:
train_data = padded_train_data
test_data = padded_test_data

In [None]:
train_ratio = round(train_data.shape[0] / (train_data.shape[0] + test_data.shape[0]) * 100, 2)
test_ratio = round(100 - train_ratio, 2)

print(f'train data: \t{train_ratio} % \n test data: \t{test_ratio} %')

The split looks okay, lets see the actual count

In [None]:
train_data.shape

In [None]:
test_data.shape

Explore a few random examples

In [None]:
def visualise(df, side):
  plt.figure(figsize=(15, 5))

  num_rows_to_show = 10
  row_nums = np.random.randint(df.shape[0], size=num_rows_to_show)

  for i, row_num in enumerate(row_nums):
      # get the raw 'row_num'th row of data from the df, skip the 0th
      # column as its the label column, turn it into a 28 x 28 image from the array,
      # color map is reverse grayscale, to provide better visibility
      # transpose to ensure right orientation
      img = df.values[row_num, 1:].reshape([side, side])
      data_class = df.values[row_num, 0]
      label = updated_class_mapping[data_class]

      plt.subplot(1, num_rows_to_show, i + 1)
      plt.imshow(img, cmap='Greys_r')
      plt.title(f'Class: {data_class} ({label})')
      plt.axis('off')

  plt.tight_layout()
  plt.show()

In [None]:
visualise(train_data, 30)

In [None]:
visualise(test_data, 30)

In [None]:
num_classes = len(train_data[0].unique())
num_classes

In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Input(shape=(30,30,1)))

model.add(tf.keras.layers.Conv2D(18,(5, 5), strides=2, activation='relu'))
model.add(tf.keras.layers.Conv2D(32,(3, 3), strides=2, activation='relu'))

model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Conv2D(64,(2, 2), activation='relu'))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
def preprocess_data(df, num_classes, side):
    df_x = df.values[:, 1:]
    df_y = df.values[:, 0]

    df_x = df_x.reshape(-1, side, side, 1)
    df_x = df_x.astype('float32')
    df_x /= 255.0

    df_y = tf.keras.utils.to_categorical(df_y, num_classes=num_classes)

    return df_x, df_y

In [None]:
train_data_x, train_data_y = preprocess_data(train_data, num_classes, 30)
test_data_x, test_data_y = preprocess_data(test_data, num_classes, 30)

In [None]:
history = model.fit(train_data_x, train_data_y, epochs=5)

loss, accuracy = model.evaluate(test_data_x, test_data_y)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

In [None]:
for filename in os.listdir("/test_symbols"):
    if filename.endswith("jpg") or filename.endswith("jpeg") or filename.endswith("png"):
        try:
            img = cv2.imread(os.path.join("/test_symbols", filename))[:,:,0]
            img = np.invert(np.array([img]))
            # img = cv2.resize(img, (30, 30))
            # img = img.reshape(1, 30, 30, 1)
            prediction = model.predict(img)

            plt.figure(figsize=(3,3))
            plt.imshow(img[0], cmap='Greys_r')
            plt.title(f'Class: {np.argmax(prediction)} ({updated_class_mapping[np.argmax(prediction)]})')
            print(f"{filename}: {updated_class_mapping[np.argmax(prediction)]}")
        except:
            print(f"Error processing {filename}")


In [ ]:
from train import load_or_train_model, MODEL_SAVE_PATH, get_class_mapping, define_model, get_emnist_data

def log(msg):
    logging = True
    if logging:
        print(msg)

In [ ]:
def load_or_train_model(save_path=MODEL_SAVE_PATH):
    if os.path.exists(save_path):
        log('Loading trained model...')
        model = tf.keras.models.load_model(save_path)

    else:
        train_x, train_y, test_x, test_y, num_classes = get_emnist_data()
        log(f'num classes {num_classes}')
        model = define_model(num_classes)

        log('Compiling model...')
        model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=['accuracy']
        )

        log('Training model...')
        model.fit(train_x, train_y, epochs=1)

        log('Evaluating model...')
        loss, accuracy = model.evaluate(test_x, test_y)

        log(f"Test Loss: {loss}")
        log(f"Test Accuracy: {accuracy}")

        log('Saving model...')
        model.save(save_path)
        log(f"Model saved to {save_path}")

    return model