# Feedforward Neural Network ON Fashion-MINIST

In [None]:
import utils.mnist_reader as mnist_reader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

seed = 0
np.random.seed(seed)

import warnings
from sklearn.exceptions import ConvergenceWarning

# Disable ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

Load Fashion-MNIST dataset

In [None]:
X, y = mnist_reader.load_mnist('data/fashion', kind='train')

X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.25, random_state=seed, shuffle=True) # Train Test Split used for training purposes

X_te, y_te = mnist_reader.load_mnist('data/fashion', kind='t10k') # The actual evaluation dataset

In [None]:
print('Training Set')
print(X_tr.shape) # 45000 rows, each row has 784 columns can be arranged in 28 * 28
print(y_tr.shape) # 45000 classifications

print('\nValuation Set')
print(X_val.shape) # 15000 rows, each row has 784 columns can be arranged in 28 * 28
print(y_val.shape) # 15000 classifications

print('\nTest Set')
print(X_te.shape) # 10000 rows, each row has 784 columns can be arranged in 28 * 28
print(y_te.shape) # 10000 classifications

Normalize the dataset

In [None]:
# Normalization
scaler = StandardScaler()
scaler.fit(X)

X_tr = scaler.transform(X_tr)
X_val = scaler.transform(X_val)
X_te = scaler.transform(X_te)

## Tune Hyperparameters

Find the best parameters with `GridSearchCV`

In [None]:
# Define the parameter grid for tuning
param_grid = {
    'hidden_layer_sizes': [(50,), (50,50)],
    'activation': ['relu', 'tanh'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.01]
}

training_amount = 15000 # valid amounts are between 0 and 45000

# Create the MLP classifier
mlp = MLPClassifier(random_state=seed)

# Perform grid search to find the best parameters
grid_search = GridSearchCV(mlp, param_grid, cv=5)
grid_search.fit(X_tr[:training_amount], y_tr[:training_amount])

In [None]:
# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Create the table to display the results
fig, ax = plt.subplots(figsize=(20, 2))
ax.axis('off')

table_data = [str(best_params), str(best_score)]

parameter_names = sorted(list(best_params.keys()))
parameter_values = [best_params[key] for key in parameter_names]

table = ax.table(cellText=[parameter_values], colLabels=parameter_names, cellLoc='center', loc='center')

# Set the table style
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1.5, 1.5)

plt.title('Best Hyper Parameters')
plt.show()

print(f'Best Accuracy Score: {best_score}')

df = pd.DataFrame(grid_search.cvresults)
print(df)

Train a `MLPClassifier` with the best parameters found

In [None]:
# Train the MLP classifier with the best parameters
# mlp_best = MLPClassifier(activation='relu', alpha=0.0001, hidden_layer_sizes=(50,), solver='sgd', random_state=seed)
mlp_best = MLPClassifier(**best_params, random_state=seed)
mlp_best.fit(X_tr, y_tr)

## Evaluate the classifier

Calculate accuracy scores

In [None]:
y_tr_pred = mlp_best.predict(X_tr)
y_val_pred = mlp_best.predict(X_val)
y_test_pred = mlp_best.predict(X_te)

acc_tr = accuracy_score(y_tr, y_tr_pred)
acc_val = accuracy_score(y_val, y_val_pred)
acc_te = accuracy_score(y_te, y_test_pred)

print(f"The MLP Classifier Has Training Accuracy: {acc_tr}")
print(f"The MLP Classifier Has Validation Accuracy: {acc_val}")
print(f"The MLP Classifier Has Testing Accuracy: {acc_te}")

Create a confusion matrix

In [None]:
cm = confusion_matrix(y_te, y_test_pred)
disp = ConfusionMatrixDisplay(confusion_matrix = cm)
disp = disp.plot(include_values=True, cmap='viridis', ax=None, xticks_rotation='horizontal')
plt.grid(False)
plt.show()

Create a classification report

In [None]:
label2desc = {0: 'T-shirt/top (label 0)',
              1: 'Trouser (label 1)',
              2: 'Pullover (label 2)',
              3: 'Dress (label 3)',
              4: 'Coat (label 4)',
              5: 'Sandal (label 5)',
              6: 'Shirt (label 6)',
              7: 'Sneaker (label 7)',
              8: 'Bag (label 8)',
              9: 'Ankle boot (label 9)'}

report = classification_report(y_te, y_test_pred, target_names = label2desc.values(), output_dict=True)

sns.heatmap(pd.DataFrame(report).iloc[:-1, :].T, annot=True)

## Learning Curve

In [None]:
mlp_train_errors = []
mlp_test_errors = []

n_tr = [100, 1000, 5000, 10000, 20000, 45000]

for n in n_tr:

    # Get a sub portion of feature vectors
    sub_X_tr = X_tr[:n]
    sub_y_tr = y_tr[:n]

    # MLPClassifier
    mlp = MLPClassifier(activation='relu', alpha=0.0001, hidden_layer_sizes=(50,), solver='sgd', random_state=seed)

    mlp.fit(sub_X_tr, sub_y_tr)

    mlp_train_pred = mlp.predict(X_tr)
    mlp_test_pred = mlp.predict(X_te)

    mlp_train_error = 1 - accuracy_score(y_tr, mlp_train_pred)
    mlp_test_error = 1 - accuracy_score(y_te, mlp_test_pred)

    mlp_train_errors.append(mlp_train_error)
    mlp_test_errors.append(mlp_test_error)

plt.figure()
plt.plot(n_tr, mlp_test_errors, 'o-', label='MLP Testing Error')
plt.plot(n_tr, mlp_train_errors, 'o--', label='MLP Training Error')

plt.xlabel('Number of Training Data Points')
plt.ylabel('Error Rate')
plt.xscale('log')
plt.title('Relation Between Amount Of Training Data and Error Rate')

plt.legend()
plt.show()