In [47]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.utils import to_categorical
from keras.optimizers import Adam, SGD, Adagrad, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Callback
warnings.filterwarnings('ignore', category=DeprecationWarning)

#### Preprocessing

In [40]:
train, test = [], []
z_train, z_test = [], []
chars =  ['a', 'bA', 'dA', 'lA', 'tA']

def normalize(seq):
    min_x, max_x = min(el[0] for el in seq), max(el[0] for el in seq)
    min_y, max_y = min(el[1] for el in seq), max(el[1] for el in seq)
    for el in seq:
        el[0] = (el[0] - min_x)/(max_x-min_x)
        el[1] = (el[1] - min_y)/(max_y-min_y)
    return seq
for ch in chars:
    for f in os.listdir("Group24/Handwriting_Data/" + str(ch)+"/train"):
        with open("Group24/Handwriting_Data/" + str(ch)+"/train/" + str(f), 'r') as file:
            contents = file.read()
            items = contents.split()
            seq = []
            for i in range(1,len(items),2):
                seq.append([float(items[i]),float(items[i+1])])
            train.append(normalize(seq))
        z_train.append(str(ch))
    for f in os.listdir("Group24/Handwriting_Data/" + str(ch)+"/dev"):
        with open("Group24/Handwriting_Data/" + str(ch)+"/dev/" + str(f), 'r') as file:
            contents = file.read()
            items = contents.split()
            seq = []
            for i in range(1,len(items),2):
                seq.append([float(items[i]),float(items[i+1])])
            test.append(normalize(seq))
        z_test.append(str(ch))

In [41]:
label_map = {'a':0, 'bA':1, 'dA':2, 'lA':3, 'tA':4}
z_train_onehot = tf.keras.utils.to_categorical([label_map[x] for x in z_train], 5)
z_test_onehot = tf.keras.utils.to_categorical([label_map[x] for x in z_test], 5)

In [42]:
max_len = max([max(len(x) for x in train), max(len(x) for x in test)])
train_padded = pad_sequences(train, maxlen=max_len, dtype='float32', padding='post')
test_padded = pad_sequences(test, maxlen=max_len, dtype='float32', padding='post')

Flattening

#### Model

In [53]:
model = Sequential()
model.add(SimpleRNN(32, input_shape=(max_len, 2)))
model.add(Dense(64, activation="relu"))
model.add(Dense(5, activation="sigmoid"))
loss_fn = tf.keras.losses.CategoricalCrossentropy()

adam_optimizer = Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
model.compile(loss=loss_fn, optimizer=adam_optimizer, metrics=['accuracy']) 
# Train the model
my_callbacks = [
    EarlyStopping(monitor='loss', min_delta=1e-4, patience=10),
    TensorBoard(log_dir=f'./logdir/Q1/test')
]
model_fit = model.fit(train_padded, z_train_onehot, batch_size=1, epochs=10000, verbose=0, callbacks=my_callbacks, validation_split=0.0, shuffle=True, validation_batch_size=None)

hist_metric = 'accuracy'
print(f'epochs: {len(model_fit.history[hist_metric])}, acc: {model_fit.history[hist_metric][-1]}\n')
model.save(f'models/Q1/test.tf')
# model.fit(train_padded, z_train_onehot, epochs=1000, batch_size=1)


epochs: 20, acc: 0.21929824352264404

INFO:tensorflow:Assets written to: models/Q1/test.tf/assets
