## Preparation

### Load the Fashion-MINIST dataset

In [1]:
import numpy as np
import fashion_mnist.utils.mnist_reader as mnist_reader
X_train, y_train = mnist_reader.load_mnist('fashion_mnist/data/fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('fashion_mnist/data/fashion', kind='t10k')


In [2]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(type(X_train))
print(np.unique(y_train))

(60000, 784)
(60000,)
(10000, 784)
(10000,)
<class 'numpy.ndarray'>
[0 1 2 3 4 5 6 7 8 9]


### Import package MyDL which is written based on Numpy

In [3]:
import MyDL
import MyDL.data
import MyDL.optimizer as optim
import MyDL.nn as nn

In [4]:
X_train_mytensor = MyDL.MyTensor(X_train[:50000], requires_grad=False)
y_train_mytensor = MyDL.MyTensor(y_train[:50000], requires_grad=False)
X_val_mytensor = MyDL.MyTensor(X_train[50000:], requires_grad=False)
y_val_mytensor = MyDL.MyTensor(y_train[50000:], requires_grad=False)
X_test_mytensor = MyDL.MyTensor(X_test, requires_grad=False)
y_test_mytensor = MyDL.MyTensor(y_test, requires_grad=False)

train_data = MyDL.data.Dataset(X_train_mytensor, y_train_mytensor)
val_data = MyDL.data.Dataset(X_val_mytensor, y_val_mytensor)
test_data = MyDL.data.Dataset(X_test_mytensor, y_test_mytensor)

## Define the MLP model

In [5]:
class MLP3(nn.NeuralNetwork):
    def __init__(self, hidden_size1=100, hidden_size2=10, activation='relu'):
        super().__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.activ_func = activation
        self.fc1 = nn.Linear(784, hidden_size1, initialize='random')
        self.params += self.fc1.params
        self.fc2 = nn.Linear(hidden_size1, hidden_size2, initialize='random')
        self.params += self.fc2.params
        self.fc3 = nn.Linear(hidden_size2, 10, initialize='random')
        self.params += self.fc3.params
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        else:
            raise ValueError('Unknown activation function')
        self.softmax = nn.Softmax()
        self.BN1 = nn.BatchNorm1d()
        self.BN2 = nn.BatchNorm1d()
        self.BN3 = nn.BatchNorm1d()
        self.params += self.BN1.params
        self.params += self.BN2.params
        self.params += self.BN3.params
    def forward(self, x):
        x = self.BN1(x)
        x = self.fc1(x)
        x = self.BN2(x)
        x = self.activation(x)
        x = self.fc2(x)
        x = self.BN3(x)
        x = self.activation(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

## Training the model

### Searching the best hyperparameters

In [8]:
continue_if_exists = False
highest_val_acc = 0
num_epochs=1
for activ_func in ['relu']:#, 'tanh']:
    for hidden_size1, hidden_size2 in [(100, 10)]:#, (10, 10)]:
        for lambda_L2 in [0.0]:#, 0.0001, 0.001, 0.01]:
            for lr in [0.1]:#, 0.01, 0.001]:
                model_name = 'MLP3_({},{})_{}_L2-{}_lr-{}'.format(hidden_size1, hidden_size2, activ_func, lambda_L2, lr)
                print(f'model: {model_name}')
                model = MLP3(hidden_size1=hidden_size1, hidden_size2=hidden_size2, activation=activ_func)
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.params, lr=lr, decay_rate=0.2)
                result = MyDL.train(model, criterion, optimizer, train_data, val_data, num_epochs=num_epochs, batch_size=256, lambda_L2=lambda_L2, model_path='model_params', continue_if_exists=continue_if_exists)
                if not (not continue_if_exists and result['continued_train']):    
                    MyDL.save_result(**result, path='results')
                if result['val_acc_epoch'][-1] > highest_val_acc:
                    highest_val_acc = result['val_acc_epoch'][-1]
                    best_model_name = model_name
                    best_hyperparams = (hidden_size1, hidden_size2, activ_func, lambda_L2, lr)

model: MLP3_(100,10)_relu_L2-0.0_lr-0.1
Model already exists. Loading model...
Model loaded successfully.
Model is not going to be trained further as continue_if_exists is set to False.



### Display the best model

In [10]:
import os
print(f'Best model: {best_model_name}')
with np.load(os.path.join('results', f'{best_model_name}.npz')) as result:
    train_loss = result['train_loss_epoch']
    val_loss = result['val_loss_epoch']
    train_acc = result['train_acc_epoch']
    val_acc = result['val_acc_epoch']
print(f'Train loss: {train_loss[-1]:.3}  Val loss: {val_loss[-1]:.3}  Train acc: {train_acc[-1]:.3}  Val acc: {val_acc[-1]:.3}')
print(f'Hyperparameters: {best_hyperparams}')

Best model: MLP3_(100,10)_relu_L2-0.0_lr-0.1
Train loss: 0.683  Val loss: 0.534  Train acc: 0.747  Val acc: 0.794
Hyperparameters: (100, 10, 'relu', 0.0, 0.1)
