# Introduction

TODO

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from sklearn import datasets
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Rescaling
from tensorflow.keras.optimizers import SGD, RMSprop, Adam, Adadelta, Adamax, Nadam, Ftrl

from os import cpu_count

plt.style.use('seaborn-dark')

# Download, Load, Visualize, and Reshape the Data

In [None]:
X_train = np.load('data/kmnist-train-imgs.npz')['arr_0']
y_train = np.load('data/kmnist-train-labels.npz')['arr_0']
X_test = np.load('data/kmnist-test-imgs.npz')['arr_0']
y_test = np.load('data/kmnist-test-labels.npz')['arr_0']

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Just like the [MNIST](https://www.tensorflow.org/datasets/catalog/mnist) dataset, we have 60,000 images in the training set and 10,000 images in the test set. Each image is 28x28 pixels.

In [None]:
print(X_train[0])

Again, just as in the MNIST dataset, the images are grayscale with a value of 0-255 indicating the darkness of the pixel.

In [None]:
set(y_train)

In [None]:
sns.histplot(y_train)
plt.title('Training Labels Histogram', fontsize=20)
plt.xlabel('Class', fontsize=15)
plt.ylabel('Count', fontsize=15)

Our labels are 0-9 and we have a very balanced training set.

## Samples of each class
Let's look at several samples of each class.

In [None]:
samples = 10
label_sample_indexes = dict()
for label in set(y_train):
    label_sample_indexes[label] = [i for i,x in enumerate(y_train) if x==label][0:samples]
label_sample_indexes

In [None]:
labels = label_sample_indexes.keys()
_, axes = plt.subplots(nrows=len(labels), ncols=samples, figsize=(10, 20))
for ax_row, label in zip(axes, labels):
    for ax, image in zip(ax_row, X_train[label_sample_indexes[label]]):
        ax.set_axis_off()
        ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
        ax.set_title(f"Training: {label}")

## Reshaping the Data

Here I will flatten the image itself into a 1D array.

In [None]:
train_shape = X_train.shape
X_train_flattened = X_train.reshape(train_shape[0], train_shape[1] * train_shape[2])
test_shape = X_test.shape
X_test_flattened = X_test.reshape(test_shape[0], test_shape[1] * test_shape[2])

print(X_train_flattened.shape, X_test_flattened.shape)

In [None]:
scaler = StandardScaler(with_mean=False, with_std=True)
X_train_scaled = scaler.fit_transform(X_train_flattened)
X_test_scaled = scaler.transform(X_test_flattened)
print("Average:", np.mean(X_train_scaled[:, 350]))
print("Std dev:", np.std(X_train_scaled[:, 350]))

# Classification with Logistic Regression

As in the example, first I'll perform a logistic regression classification to compare, later, with the deep learning models.

In [None]:
sgd = SGDClassifier(loss="log", n_jobs=cpu_count())
sgd.fit(X_train_scaled, y_train)

y_predicted = sgd.predict(X_test_scaled)

acc = 100. * accuracy_score(y_test, y_predicted)
cm = confusion_matrix(y_test, y_predicted)

print(f"Accuracy: {acc:.2f}")
print("Confusion Matrix:")
print(cm)

The logistic regression performed with an accuracy of about 65%.

# Classification with Multi-Layer Perceptron

For this multiclass classification problem I will choose the [sparse_categorical_crossentropy](https://keras.io/api/losses/probabilistic_losses/#sparsecategoricalcrossentropy-class) for the probabilistic loss function since we have a multiclass problem and our classes are represented as integers (i.e., not one-hot encoded). I'll also begin with the [SGD](https://keras.io/api/optimizers/sgd/) optimizer as I have worked with this the most in class so far.




In [None]:
epochs = 10
batch_size = 256

In [None]:
model = Sequential()
model.add(Input(shape=(28, 28)))
model.add(Rescaling(scale=1./255))
model.add(Flatten())
model.add(Dense(8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(len(labels), activation='softmax'))

model.summary()

In [None]:
model.compile(
    loss='sparse_categorical_crossentropy', 
    optimizer=SGD(),
    metrics=['accuracy']
)
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1)

In [None]:
def build_model(n_hidden=2, n_neurons=8, dropout=None, loss='sparse_categorical_crossentropy', optimizer=SGD()):
    model = Sequential()
    model.add(Input(shape=(28, 28)))
    model.add(Rescaling(scale=1./255))
    model.add(Flatten())
    for hidden_layer in range(n_hidden):
        model.add(Dense(n_neurons, activation='relu'))
        if dropout:
            model.add(Dropout(dropout))
    model.add(Dense(len(labels), activation='softmax'))
    model.summary()
    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return model


In [None]:
def fit_and_predict(batch_size=256, epochs=10):
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
    predicted_probabilities = model.predict(X_test)
    predicted_classes = np.argmax(predicted_probabilities, axis=1)
    acc = 100. * accuracy_score(y_test, predicted_classes)
    cm = confusion_matrix(y_test, predicted_classes)

    return acc, cm

In [None]:
model = build_model()

In [None]:
acc, cm = fit_and_predict()

In [None]:
print(f"Accuracy: {acc:.2f}%")
print(f"Confusion Matrix:\n{cm}")