# MLP - Multilayer Perceptron

Importing libraries

In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.neural_network import MLPClassifier
from ipynb.fs.defs.Additional_metrics import *

Defining functions that will perform .fit() and .predict() while calculating execution time 

In [None]:
def mlp_fit_with_time(mlp, X_trainCV, y_trainCV, time_fit_tmp):
    start = time.time()     
    mlp.fit(X_trainCV, y_trainCV)
    time_fit_tmp.append(time.time()-start)

In [None]:
def mlp_predict_with_time(mlp, X_testCV, time_pred_tmp):
    start = time.time()
    y_pred = mlp.predict(X_testCV)
    time_pred_tmp.append(time.time()-start)
    return y_pred

Getting touple values for number of neurons in layers, where every layer has same or same neurons than the previous layer

In [None]:
import itertools 

x = [128, 96, 64, 32, 16] 
hd_list = list(itertools.combinations(x,3)) + list(itertools.combinations(x,2)) + [(128,128,128), (96,96,96), (64,64,64), (32,32,32), (128,128), (96,96), (64,64)]
print(hd_list)

## Trained on 30% samples of the original training dataset

In [None]:
train_data = pd.read_csv("../datasets/fashion-mnist_train.csv")
X_train = train_data.iloc[:,1:]
y_train = train_data.iloc[:,0]

test_data = pd.read_csv("../datasets/fashion-mnist_test.csv")
X_test = test_data.iloc[:,1:]
y_test = test_data.iloc[:,0]

X_train_std = (X_train - X_train.mean()) / X_train.std()
X_test_std = (X_test - X_test.mean()) / X_test.std()

In [None]:
X_train_30, X_test_30, y_train_30, y_test_30 = train_test_split(X_train_std, y_train, test_size=0.7, stratify=y_train, random_state=42)

In [None]:
kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)
acc = []
i, best_acc, best_bs  = 0, 0, 0
best_a, best_s = '', ''
best_hd = ()

for hd in hd_list:
    for a in ["logistic", "tanh", "relu"]:
        # "lbfgs" isn't good for big databases
        for s in ["sgd", "adam"]:
            for bs in [64]:

                acc_tmp, time_fit_tmp, time_pred_tmp = [], [], []
                
                indexes = kf.split(X_train_30, y_train_30)

                for train_index, test_index in indexes:

                    X_trainCV1 = X_train_30.iloc[train_index,:]
                    y_trainCV1 = y_train_30.iloc[train_index]
                    X_testCV1 = X_train_30.iloc[test_index,:]
                    y_testCV1 = y_train_30.iloc[test_index]

                    classifier1 = MLPClassifier(hidden_layer_sizes = hd, activation = a,
                                                    solver = s, batch_size = bs, learning_rate = "constant", 
                                                    learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                                                    random_state = 42, early_stopping = True, n_iter_no_change = 10,
                                                    validation_fraction = 0.1, verbose = False)
                    
                    mlp_fit_with_time(classifier1, X_trainCV1.values, y_trainCV1.values, time_fit_tmp)
                    y_pred1 = mlp_predict_with_time(classifier1, X_testCV1.values, time_pred_tmp)

                    c1 = confusion_matrix(y_testCV1, y_pred1)
                    acc_tmp.append(np.trace(c1)/sum(sum(c1)))

                print(f"{i+1}. Hd = {hd}, a = {a}, s = {s}, bs = {bs}, Average accuracy: {np.mean(acc_tmp):.6f} | Average fit time: {np.mean(time_fit_tmp):.6f}s | Average predict time: {np.mean(time_pred_tmp):.6f}s")
                acc.append(np.mean(acc_tmp))
                i += 1

                if np.mean(acc_tmp) > best_acc:
                    best_acc = np.mean(acc_tmp)
                    best_hd, best_a, best_s, best_bs = hd, a, s, bs
            
    print('')
      
print('-------------------')
print('Best accuracy is in iteration number: ', np.argmax(acc))

#### Training on 30% training samples, test on test dataset

In [None]:
classifier1 = MLPClassifier(hidden_layer_sizes = best_hd, activation = best_a,
                        solver = best_s, batch_size = best_bs, learning_rate = "constant", 
                            learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                            random_state = 42, early_stopping = True, n_iter_no_change = 10,
                            validation_fraction = 0.1, verbose = False)

start1 = time.time()
classifier1.fit(X_train_30.values, y_train_30.values)
fit_time = time.time() - start1

start2 = time.time()
y_pred2 = classifier1.predict(X_test_std.values)
pred_time = time.time() - start2

c2 = confusion_matrix(y_test, y_pred2, labels=[0,1,2,3,4,5,6,7,8,9])

print(f"Accuracy: {np.trace(c2)/sum(sum(c2)):.6f} | Average fit time: {fit_time:.6f}s | Average predict time: {pred_time:.6f}s")
print('-------------------')
print('MLP parameters: hd = ', best_hd, ', a = ', best_a, ', s = ', best_s, ', bs = ', best_bs, sep='')

.

## Trained on 100% samples of the original training dataset

In [None]:
train_data = pd.read_csv("../datasets/fashion-mnist_train.csv")
X_train = train_data.iloc[:,1:]
y_train = train_data.iloc[:,0]

test_data = pd.read_csv("../datasets/fashion-mnist_test.csv")
X_test = test_data.iloc[:,1:]
y_test = test_data.iloc[:,0]

X_train_std = (X_train - X_train.mean()) / X_train.std()
X_test_std = (X_test - X_test.mean()) / X_test.std()

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

acc, acc_tmp, time_fit_tmp, time_pred_tmp = [], [], [], []

indexes = kf.split(X_train, y_train)
        
for train_index, test_index in indexes:
    
    X_trainCV2 = X_train_std.iloc[train_index,:]
    y_trainCV2 = y_train.iloc[train_index]
    X_testCV2 = X_train_std.iloc[test_index,:]
    y_testCV2 = y_train.iloc[test_index]
                   
    classifier2 = MLPClassifier(hidden_layer_sizes = best_hd, activation = best_a,
                            solver = best_s, batch_size = best_bs, learning_rate = "constant", 
                            learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                            random_state = 42, early_stopping = True, n_iter_no_change = 10,
                            validation_fraction = 0.1, verbose = False)
    
    mlp_fit_with_time(classifier2, X_trainCV2.values, y_trainCV2.values, time_fit_tmp)
    y_pred3 = mlp_predict_with_time(classifier2, X_testCV2.values, time_pred_tmp)

    c3 = confusion_matrix(y_testCV2, y_pred3)
    acc_tmp.append(np.trace(c3)/sum(sum(c3)))

    print(f"        Accuracy: {np.trace(c3)/sum(sum(c3)):.6f} | Fit time: {time_fit_tmp[-1]:.6f}s | Predict time: {time_pred_tmp[-1]:.6f}s")

print("")    
print('-------------------')
print(f"Average accuracy: {np.mean(acc_tmp):.6f} | Average fit time: {np.mean(time_fit_tmp):.6f}s | Average predict time: {np.mean(time_pred_tmp):.6f}s")

##### Training on 100% training samples, test on test dataset

In [None]:
classifier2 = MLPClassifier(hidden_layer_sizes = best_hd, activation = best_a,
                            solver = best_s, batch_size = best_bs, learning_rate = "constant", 
                            learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                            random_state = 42, early_stopping = True, n_iter_no_change = 10,
                            validation_fraction = 0.1, verbose = False)

start1 = time.time()
classifier2.fit(X_train_std.values, y_train.values)
fit_time = time.time() - start1

start2 = time.time()
y_pred4 = classifier2.predict(X_test_std.values)
pred_time = time.time() - start2

c4 = confusion_matrix(y_test, y_pred4, labels=[0,1,2,3,4,5,6,7,8,9])  

print(f"Accuracy: {np.trace(c4)/sum(sum(c4)):.6f} | Average fit time: {fit_time:.6f}s | Average predict time: {pred_time:.6f}s")
print('-------------------')
print('MLP parameters: hd = ', best_hd, ', a = ', best_a, ', s = ', best_s, ', bs = ', best_bs, sep='')

.

## Trained on 100% samples with PCA reduction

In [None]:
train_data = pd.read_csv("../datasets/fashion-mnist_train.csv")
X_train = train_data.iloc[:,1:]
y_train = train_data.iloc[:,0]

test_data = pd.read_csv("../datasets/fashion-mnist_test.csv")
X_test = test_data.iloc[:,1:]
y_test = test_data.iloc[:,0]

X_train_std = (X_train - X_train.mean()) / X_train.std()
X_test_std = (X_test - X_test.mean()) / X_test.std()

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
acc = []
i, best_acc, best_pca = 0, 0, 0, 0
best_a, best_s = '', ''
best_hd = ()

for hd in hd_list:
    for a in ["logistic", "tanh", "relu"]:
        for s in ["sgd", "adam"]:
            for pc in [0.9]:

                acc_tmp, time_fit_tmp, time_pred_tmp = [], [], []
                
                indexes = kf.split(X_train_30, y_train_30)

                for train_index, test_index in indexes:

                    X_trainCV3 = X_train_std.iloc[train_index,:]
                    y_trainCV3 = y_train.iloc[train_index]
                    X_testCV3 = X_train_std.iloc[test_index,:]
                    y_testCV3 = y_train.iloc[test_index]

                    pca = PCA(n_components=pc)
                    pca.fit(X_trainCV3)
                    X_train_r = pca.transform(X_trainCV3)
                    X_test_r = pca.transform(X_testCV3)
   
                    classifier3 = MLPClassifier(hidden_layer_sizes = hd, activation = a,
                                        solver = s, batch_size = best_bs, learning_rate = "constant", 
                                        learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                                        random_state = 42, early_stopping = True, n_iter_no_change = 10,
                                        validation_fraction = 0.1, verbose = False)
                    
                    mlp_fit_with_time(classifier3, X_train_r, y_trainCV3, time_fit_tmp)
                    y_pred5 = mlp_predict_with_time(classifier3, X_test_r, time_pred_tmp)

                    c5 = confusion_matrix(y_testCV3, y_pred5, labels=[0,1,2,3,4,5,6,7,8,9])
                    acc_tmp.append(np.trace(c5)/sum(sum(c5)))

                print(f"{i+1}. Hd = {hd}, a = {a}, s = {s}, bs = {bs}, Average accuracy: {np.mean(acc_tmp):.6f} | Average fit time: {np.mean(time_fit_tmp):.6f}s | Average predict time: {np.mean(time_pred_tmp):.6f}s")
                acc.append(np.mean(acc_tmp))
                i += 1

                if np.mean(acc_tmp) > best_acc:
                    best_acc = np.mean(acc_tmp)
                    best_hd, best_a, best_s, best_pca = hd, a, s, pc
            
    print('')
      
print('-------------------')
print('Best accuracy is in iteration number: ', np.argmax(acc))

##### Training on 100% training samples with PCA reduction, test on test dataset

In [None]:
pca = PCA(n_components=best_pca)
pca.fit(X_train_std)
X_train_r = pca.transform(X_train_std)
X_test_r = pca.transform(X_test_std)

classifier3 = MLPClassifier(hidden_layer_sizes = best_hd, activation = best_a,
                                    solver = best_s, batch_size = best_bs, learning_rate = "constant", 
                                    learning_rate_init = 0.001, max_iter = 100, shuffle = True,
                                    random_state = 42, early_stopping = True, n_iter_no_change = 10,
                                    validation_fraction = 0.1, verbose = False)

start1 = time.time()
classifier3.fit(X_train_r, y_train) 
fit_time = time.time() - start1

start2 = time.time()
y_pred6 = classifier3.predict(X_test_r)
pred_time = time.time() - start2

c6 = confusion_matrix(y_test, y_pred6, labels=[0,1,2,3,4,5,6,7,8,9])

print(f"Accuracy: {np.trace(c6)/sum(sum(c6)):.6f} | Average fit time: {fit_time:.6f}s | Average predict time: {pred_time:.6f}s")
print('-------------------')
print('MLP parameters: hd = ', best_hd, ', a = ', best_a, ', s = ', best_s, ', bs = ', best_bs, ', pca = ', best_pca, sep='')

In [None]:
# confusion matrix
print(c6)

In [None]:
print(accuracy_per_class(c6, y_test.unique()))

In [None]:
print(sensitivity_per_class(c6, y_test.unique()))

## Examples of wrong classification

In [None]:
y_pred_np = y_pred6
print(type(y_pred6))

y_test_np = y_test.to_numpy()
print(type(y_test_np))

In [None]:
def differences(a,b):
    list = []
    for j in range(len(a)):
        if b[j] != a[j]:
            list = list + [j]        
    arrayIndexes = np.asarray(list)
    return arrayIndexes

In [None]:
diff = differences(y_pred_np, y_test_np)
print(diff[:9])

In [None]:
from PIL import Image

In [None]:
plt.figure(figsize=(8,8))

for j in range(0,9):  
    plt.subplot(3,3,j+1)    
    arr = X_test.iloc[diff[j],:].to_numpy()
    
    arr = arr.reshape(28,28)
    arr = arr.astype(np.uint8)
    img = Image.fromarray(arr, "L")
    
    plt.imshow(img, cmap = 'gray')
    plt.title(f"Predicted {y_pred_np[diff[j+100]]}, Real {y_test_np[diff[j+100]]}")
    plt.axis("off")

plt.show()