In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import KFold
from keras.layers import LeakyReLU, Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
import tensorflow as tf

In [17]:
data_test = pd.read_csv('Data/emnist-letters-test.csv', header = None)
data_train = pd.read_csv('Data/emnist-letters-train.csv', header = None)

X_train = data_train.drop(0, axis = 1)
y_train = data_train[0].values
X_test = data_test.drop(0, axis = 1)
y_test = data_test[0].values

In [18]:
def preprocess(x):
    return x.reshape(-1,28,28,1,order='F')/255.0

X_train_preprocessed = preprocess(X_train.values)
X_test_preprocessed = preprocess(X_test.values)

label_enc = OneHotEncoder(sparse_output=False)

kf = KFold(n_splits = 5, shuffle = True, random_state = 42)
cv_train_scores = []
cv_test_scores = []

In [20]:
for train_index, val_index in kf.split(X_train_preprocessed):
    X_train_fold, X_val = X_train_preprocessed[train_index], X_train_preprocessed[val_index]
    y_train_fold, y_val = y_train[train_index], y_train[val_index]

    y_train_fold_tensor = tf.convert_to_tensor(label_enc.fit_transform(y_train_fold.reshape(-1, 1)))
    y_val_tensor = tf.convert_to_tensor(label_enc.transform(y_val.reshape(-1, 1)))

    X_train_tensor = tf.convert_to_tensor(X_train_fold)
    X_val_tensor = tf.convert_to_tensor(X_val)

    model = Sequential()
    tf.keras.layers.Input(shape=(28, 28, 1)),        
    Conv2D(32, kernel_size=(3, 3)),
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling2D(pool_size=(2, 2))) # for downsizing the info from the features a bit
    model.add(Dropout(0.25)) # to prevent having dead neurons
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(200))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Dense(26, activation='softmax'))
    opt = Adam(learning_rate=0.002)
    model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])


    model.fit(X_train_tensor, y_train_fold_tensor, epochs=25, batch_size=32, validation_data = (X_val_tensor, y_val_tensor), verbose=1)

    # Evaluate the model on the training data
    train_score = model.evaluate(X_train_tensor, y_train_fold_tensor, verbose=0)[1]
    cv_train_scores.append(train_score)

    # Evaluate the model on the validation data
    val_score = model.evaluate(X_val_tensor, y_val_tensor, verbose=0)[1]
    cv_test_scores.append(val_score)

average_train_score = np.mean(cv_train_scores)
average_val_score = np.mean(cv_test_scores)
print(f'Average Training Accuracy: {average_train_score}')
print(f'Average Validation Accuracy: {average_val_score}')


X_test_tensor = tf.convert_to_tensor(X_test_preprocessed)
y_test_tensor = tf.convert_to_tensor(label_enc.transform(y_test.reshape(-1, 1)))

test_loss, test_accuracy = model.evaluate(X_test_tensor, y_test_tensor, verbose=0)
print(f'Test Accuracy: {test_accuracy}')


Epoch 1/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.7627 - loss: 0.7513 - val_accuracy: 0.7601 - val_loss: 0.7631
Epoch 2/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.8390 - loss: 0.4914 - val_accuracy: 0.7305 - val_loss: 0.8854
Epoch 3/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8522 - loss: 0.4450 - val_accuracy: 0.7741 - val_loss: 0.7144
Epoch 4/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8599 - loss: 0.4214 - val_accuracy: 0.6983 - val_loss: 1.0442
Epoch 5/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8652 - loss: 0.4042 - val_accuracy: 0.7274 - val_loss: 0.9088
Epoch 6/25
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.8682 - loss: 0.3893 - val_accuracy: 0.7496 - val_loss: 0.8229
Epoch 7/25