In [None]:
import numpy as np
import os
import glob
from sklearn.model_selection import train_test_split
import tensorflow as tf
from random import randint
import matplotlib.pyplot as plt
%matplotlib inline  
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
with open('./npy/classes.txt', 'r') as f:
    class_names = [line.strip() for line in f.readlines()]

In [None]:
def load_data(root, vfold_ratio=0.1, max_items_per_class=40000):
    all_files = glob.glob(os.path.join(root, '*.npy'))

    x = np.empty([0, 784])
    y = np.empty([0])
    class_names = []

    for idx, file in enumerate(all_files):
        print(idx)
        print(file)
        data = np.load(file)
        data = data[0: max_items_per_class, :]
        labels = np.full(data.shape[0], idx)

        x = np.concatenate((x, data), axis=0)
        y = np.append(y, labels)

        class_name, ext = os.path.splitext(os.path.basename(file))
        class_names.append(class_name)

    data = None
    labels = None
    
    permutation = np.random.permutation(y.shape[0])
    x = x[permutation, :]
    y = y[permutation]

    vfold_size = int(x.shape[0]/100*(vfold_ratio*100))

    x_test = x[0:vfold_size, :]
    y_test = y[0:vfold_size]

    x_train = x[vfold_size:x.shape[0], :]
    y_train = y[vfold_size:y.shape[0]]
    return x_train, y_train, x_test, y_test, class_names

In [None]:
x_train, y_train, x_test, y_test, class_names = load_data('npy/data')
num_classes = len(class_names)
image_size = 28

In [None]:
print(len(x_train))
print(len(y_train))
print(len(x_test))
print(len(y_test))

In [None]:
idx = randint(0, len(x_train))
print(x_train[idx].reshape(28,28))
plt.imshow(x_train[idx].reshape(28,28)) 
print(class_names[int(y_train[idx].item())])

In [None]:
x_train = x_train.reshape(x_train.shape[0], image_size, image_size, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], image_size, image_size, 1).astype('float32')

In [None]:
x_train /= 255.0
x_test /= 255.0

In [None]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
x_train.shape

In [None]:
CNN_model = Sequential()

CNN_model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:], activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))

CNN_model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(Dropout(0.1))

CNN_model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))

CNN_model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
CNN_model.add(BatchNormalization())
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(Dropout(0.1))

CNN_model.add(Flatten())
CNN_model.add(Dense(512, activation='relu'))
CNN_model.add(Dropout(0.1))
CNN_model.add(Dense(345, activation='softmax'))

optimizer = Adam(learning_rate=0.001)
CNN_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['top_k_categorical_accuracy'])

print(CNN_model.summary())

In [None]:
history = CNN_model.fit(x = x_train, y = y_train, 
                    validation_split=0.1, batch_size = 256, 
                    verbose=2, epochs=40, 
                    callbacks=[EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True)])

In [None]:
# Extract training history
loss = history.history['loss']
val_loss = history.history['val_loss']
accuracy = history.history['top_k_categorical_accuracy']
val_accuracy = history.history['val_top_k_categorical_accuracy']

# Plot training and validation loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
score = CNN_model.evaluate(x_test, y_test, verbose=0)
print('Test accuarcy: {:0.2f}%'.format(score[1] * 100))

In [None]:
predictions = CNN_model.predict(x_test)

In [None]:
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
true_classes = np.argmax(y_test, axis=1)

In [None]:
from sklearn.metrics import f1_score, roc_auc_score

In [None]:
f1 = f1_score(true_classes, predicted_classes, average='macro')
print(f"F1 Score (Macro): {f1:.4f}")

In [None]:
from sklearn.preprocessing import label_binarize

In [None]:
n_classes = y_test.shape[1]
y_test_binarized = label_binarize(true_classes, classes=[*range(n_classes)])

In [None]:
auc = roc_auc_score(y_test_binarized, predictions, multi_class='ovr')
print(f"AUC (One-vs-Rest): {auc:.4f}")

In [None]:
from sklearn.metrics import roc_curve, auc, confusion_matrix, RocCurveDisplay
import itertools

In [None]:
n_classes = y_test.shape[1]
y_test_binarized = label_binarize(np.argmax(y_test, axis=1), classes=[*range(n_classes)])

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_binarized[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plotting all ROC curves
plt.figure(figsize=(10, 8))
colors = itertools.cycle(['blue', 'red', 'green', 'yellow', 'cyan', 'magenta', 'black'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for multi-class')
plt.savefig('ROC_Curve.png', format='png', dpi=300)  # Save as PNG with high resolution
plt.savefig('ROC_Curve.pdf', format='pdf', dpi=300)
plt.show()

In [None]:
RNN_model = Sequential()

RNN_model.add(LSTM(64, return_sequences=True, input_shape=x_train.shape[1:])
RNN_model.add(Dropout(0.1))

RNN_model.add(LSTM(128, return_sequences=True))
RNN_model.add(LSTM(256, return_sequences=True))
RNN_model.add(Dropout(0.1))

RNN_model.add(LSTM(512, return_sequences=True))
RNN_model.add(LSTM(256, return_sequences=True))
RNN_model.add(Dropout(0.1))

RNN_model.add(LSTM(128, return_sequences=True))
RNN_model.add(LSTM(256, return_sequences=True))
RNN_model.add(Dropout(0.1))

RNN_model.add(LSTM(128)) 
RNN_model.add(Dropout(0.1))

RNN_model.add(Dense(512, activation='relu'))
RNN_model.add(Dropout(0.1))

RNN_model.add(Dense(345, activation='softmax'))

optimizer = Adam(learning_rate=0.001)
RNN_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

print(RNN_model.summary())

In [None]:
history = RNN_model.fit(x = x_train, y = y_train, 
                    validation_split=0.1, batch_size = 256, 
                    verbose=2, epochs=40, 
                    callbacks=[EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True)])

In [None]:
# Extract training history
loss = history.history['loss']
val_loss = history.history['val_loss']
accuracy = history.history['top_k_categorical_accuracy']
val_accuracy = history.history['val_top_k_categorical_accuracy']

# Plot training and validation loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
score = RNN_model.evaluate(x_test, y_test, verbose=0)
print('Test accuarcy: {:0.2f}%'.format(score[1] * 100))

In [None]:
CNN_model.save('CNN_model')

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model('CNN_model')

In [None]:
tflite_model = converter.convert()

In [None]:
with open('CNN_model.tflite', 'wb') as f:
    f.write(tflite_model)