## Preparation

In [1]:
import os
import numpy as np
import MyDL
from MyDL import nn as nn
import MyDL.optimizer as optim
import MyDL.data as data

In [6]:
import fashion_mnist.utils.mnist_reader as mnist_reader
X_train, y_train = mnist_reader.load_mnist('fashion_mnist/data/fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('fashion_mnist/data/fashion', kind='t10k')
X_train_mytensor = MyDL.MyTensor(X_train, requires_grad=False)
X_test_mytensor = MyDL.MyTensor(X_test, requires_grad=False)
y_train_mytensor = MyDL.MyTensor(y_train, requires_grad=False)
y_test_mytensor = MyDL.MyTensor(y_test, requires_grad=False)

train_data = data.Dataset(X_train_mytensor, y_train_mytensor)
test_data = data.Dataset(X_test_mytensor, y_test_mytensor)

## Define the model

In [7]:
class MLP3(nn.NeuralNetwork):
    def __init__(self, hidden_size1=100, hidden_size2=10, activation='relu'):
        super().__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.activ_func = activation
        self.fc1 = nn.Linear(784, hidden_size1, initialize='random')
        self.params += self.fc1.params
        self.fc2 = nn.Linear(hidden_size1, hidden_size2, initialize='random')
        self.params += self.fc2.params
        self.fc3 = nn.Linear(hidden_size2, 10, initialize='random')
        self.params += self.fc3.params
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        else:
            raise ValueError('Unknown activation function')
        self.softmax = nn.Softmax()
        self.BN = nn.BatchNorm1d()
    def forward(self, x):
        x = self.BN(x)
        x = self.fc1(x)
        x = self.BN(x)
        x = self.activation(x)
        x = self.fc2(x)
        x = self.BN(x)
        x = self.activation(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

## Training the best-performing model on whole training data

In [8]:
best_model_name = 'MLP3_(100,10)_tanh_L2-0.0_lr-0.01'
print(f'Best model: {best_model_name}')
with np.load(os.path.join('results', f'{best_model_name}.npz')) as result:
    train_loss = result['train_loss']
    val_loss = result['val_loss']
    train_acc = result['train_acc']
    val_acc = result['val_acc']
print(f'Train loss: {train_loss[-1]:.3}  Val loss: {val_loss[-1]:.3}  Train acc: {train_acc[-1]:.3}  Val acc: {val_acc[-1]:.3}')

Best model: MLP3_(100,10)_tanh_L2-0.0_lr-0.01
Train loss: 0.39  Val loss: 0.42  Train acc: 0.862  Val acc: 0.849


In [None]:
continue_if_exists = True
num_epochs = 30
lambda_L2 = 0.0
model = MLP3(hidden_size1=100, hidden_size2=10, activation='tanh')
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.params, lr=0.01, decay_rate=0.3)  # lr decays much faster than the first train
train_loss, val_loss, train_acc, val_acc, continued_train = MyDL.train(model, criterion, optimizer, train_data, test_data, num_epochs=num_epochs, batch_size=256, lambda_L2=lambda_L2, path='final_model_params', continue_if_exists=continue_if_exists)
MyDL.save_loss(train_loss, val_loss, train_acc, val_acc, best_model_name, continued_train, path='final_results')

## Plotting the loss curve

In [None]:
import os
import matplotlib.pyplot as plt

# Plotting the loss curve
plt.plot(train_loss, label='Train Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()
plt.show()
plt.savefig('final_results/loss_curve.png')

## Visualizing model parameters

In [None]:
model = MLP3(hidden_size1=100, hidden_size2=10, activation='tanh')
model.load(os.path.join('final_model_params', f'{best_model_name}.npz'))

In [None]:
import matplotlib.pyplot as plt
# Visualizing the first layer
for param_name, param in zip(['Layer1-weights', 'Layer1-biases'] ,model.params[:2]):
    param = param.data
    reshaped = param.T.reshape(-1, 28, 28)
    fig, axs = plt.subplots(10, 10, figsize=(10, 10))
    for i in range(reshaped.shape[0]):
        axs[i//10, i%10].imshow(reshaped[i], cmap='viridis')
        axs[i//10, i%10].axis('off')
    plt.suptitle(param_name)
    plt.show()
    plt.savefig(f'final_results/{param_name}.png')

# visualizing the after layers
for param_name, param in zip(['Layer2-weights', 'Layer2-biases', 'Layer3-weights', 'Layer3-biases'] ,model.params[2:]):
    param = param.data
    plt.figure(figsize=(10, 10))
    plt.title(param_name)
    plt.hist(param)
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.show()
    plt.savefig(f'final_results/{param_name}.png')

