# Mohammed Mynuddin
# ID : 950446781
# Subject: Introduction to Neural Network

This notebook contains experiments for:

* Loss functions
* Learning rate decay
* Weight initialization
* Optimizers

# `lincoln` imports

In [55]:
import numpy as np

In [56]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [57]:
import lincoln
from lincoln.layers import Dense
from lincoln.losses import SoftmaxCrossEntropy, MeanSquaredError
from lincoln.optimizers import Optimizer, SGD, SGDMomentum
from lincoln.activations import Sigmoid, Tanh, Linear, ReLU
from lincoln.network import NeuralNetwork
from lincoln.train import Trainer
from lincoln.utils import mnist
from lincoln.utils.np_utils import softmax

In [58]:
mnist.init()
X_train, y_train, X_test, y_test = mnist.load()

Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [59]:
num_labels = len(y_train)
num_labels

60000

In [60]:
# one-hot encode
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

# MNIST Demos

# Scale data to mean 0, variance 1

In [61]:
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)

In [62]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-33.318421449829934,
 221.68157855017006,
 -33.318421449829934,
 221.68157855017006)

In [63]:
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [64]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-0.424073894391566, 2.821543345689335, -0.424073894391566, 2.821543345689335)

In [65]:
def calc_accuracy_model(model, test_set):
    return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

## Question 1 
# Compare the accuracy of the following neural network architectures

# a). 100 RELU hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [66]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=ReLU()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.796
Validation loss after 20 epochs is 0.738
Validation loss after 30 epochs is 0.721
Validation loss after 40 epochs is 0.711
Validation loss after 50 epochs is 0.702
Validation loss after 60 epochs is 0.698

Loss increased after epoch 70, final loss was 0.698, 
using the model from epoch 60

The model validation accuracy is: 38.39%


In [54]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=ReLU()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)


Loss increased after epoch 10, final loss was 1000000000.000, 
using the model from epoch 0

The model validation accuracy is: 9.80%


The model validation accuracy is very low for MeanSquaredError. The reason is that we should be using softmax cross entropy loss!

# b). 100 Tanh hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [67]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.602
Validation loss after 20 epochs is 0.555
Validation loss after 30 epochs is 0.483
Validation loss after 40 epochs is 0.461
Validation loss after 50 epochs is 0.439
Validation loss after 60 epochs is 0.398
Validation loss after 70 epochs is 0.383

The model validation accuracy is: 71.54%


Note: even if we normalize the outputs of a classification model with mean squared error loss, it still doesn't help:

In [25]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.876

Loss increased after epoch 20, final loss was 0.876, 
using the model from epoch 10
The model validation accuracy is: 47.72%


The reason is that we should be using softmax cross entropy loss!

# c). 100 Sigmoid hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [26]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.654
Validation loss after 20 epochs is 0.429
Validation loss after 30 epochs is 0.376
Validation loss after 40 epochs is 0.356
Validation loss after 50 epochs is 0.345
Validation loss after 60 epochs is 0.338
Validation loss after 70 epochs is 0.332

The model validation accuracy is: 73.09%


In [27]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.582

Loss increased after epoch 20, final loss was 0.582, 
using the model from epoch 10

The model validation accuracy is: 62.23%


# d). 100 Sigmoid hidden units, 10 Linear output units, Softmax loss

In [28]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 1,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 1.336
Validation loss after 2 epochs is 1.015
Validation loss after 3 epochs is 0.878
Validation loss after 4 epochs is 0.790
Validation loss after 5 epochs is 0.741
Validation loss after 6 epochs is 0.698
Validation loss after 7 epochs is 0.665
Validation loss after 8 epochs is 0.645
Validation loss after 9 epochs is 0.623
Validation loss after 10 epochs is 0.615
Validation loss after 11 epochs is 0.595
Validation loss after 12 epochs is 0.582
Validation loss after 13 epochs is 0.574
Validation loss after 14 epochs is 0.563
Validation loss after 15 epochs is 0.556
Validation loss after 16 epochs is 0.552
Validation loss after 17 epochs is 0.539
Validation loss after 18 epochs is 0.534
Validation loss after 19 epochs is 0.527
Validation loss after 20 epochs is 0.518
Validation loss after 21 epochs is 0.517
Validation loss after 22 epochs is 0.513
Validation loss after 23 epochs is 0.505
Validation loss after 24 epochs is 0.503
Validation loss after 25 

# e). 100 Tanh hidden units, 10 Linear output units, MeanSquaredError loss

In [29]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)


Loss increased after epoch 10, final loss was 1000000000.000, 
using the model from epoch 0

The model validation accuracy is: 9.80%


In [30]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 2.006
Validation loss after 20 epochs is 1.034

Loss increased after epoch 30, final loss was 1.034, 
using the model from epoch 20

The model validation accuracy is: 60.99%


# Question 2 
#  Use the best model from the previous question to answer this question. Compare the 
following SGD momentum algorithms

# d is the best model based on the accuracy 

## a. Momentum = 0.7

In [47]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.7)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.7))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 1,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.827
Validation loss after 2 epochs is 0.656
Validation loss after 3 epochs is 0.583
Validation loss after 4 epochs is 0.544
Validation loss after 5 epochs is 0.526
Validation loss after 6 epochs is 0.491
Validation loss after 7 epochs is 0.470
Validation loss after 8 epochs is 0.461
Validation loss after 9 epochs is 0.448
Validation loss after 10 epochs is 0.444
Validation loss after 11 epochs is 0.430
Validation loss after 12 epochs is 0.424

Loss increased after epoch 13, final loss was 0.424, 
using the model from epoch 12
The model validation accuracy is: 92.38%


## b. Momentum = 0.8


In [33]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.8)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.8))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 1,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.745
Validation loss after 2 epochs is 0.589
Validation loss after 3 epochs is 0.537
Validation loss after 4 epochs is 0.504
Validation loss after 5 epochs is 0.489
Validation loss after 6 epochs is 0.462
Validation loss after 7 epochs is 0.447
Validation loss after 8 epochs is 0.431
Validation loss after 9 epochs is 0.429
Validation loss after 10 epochs is 0.420
Validation loss after 11 epochs is 0.411
Validation loss after 12 epochs is 0.407

Loss increased after epoch 13, final loss was 0.407, 
using the model from epoch 12
The model validation accuracy is: 92.68%


## c. Momentum = 0.9

In [35]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.9)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.9))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 1,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.603
Validation loss after 2 epochs is 0.490
Validation loss after 3 epochs is 0.449
Validation loss after 4 epochs is 0.424
Validation loss after 5 epochs is 0.411
Validation loss after 6 epochs is 0.392
Validation loss after 7 epochs is 0.376
Validation loss after 8 epochs is 0.368

Loss increased after epoch 9, final loss was 0.368, 
using the model from epoch 8
The model validation accuracy is: 93.53%


# Question 3   
# Use the best model from the previous question to answer this question. Compare the 
following weight decay algorithms

# a). Linear decay from 0.2 to 0.02

In [37]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.324

Loss increased after epoch 20, final loss was 0.324, 
using the model from epoch 10
The model validation accuracy is: 95.17%


# b). Exponential decay from 0.25 to 0.02

In [39]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.25, 
                        momentum=0.9, 
                        final_lr = 0.02, 
                        decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.322
Validation loss after 20 epochs is 0.320

Loss increased after epoch 30, final loss was 0.320, 
using the model from epoch 20
The model validation accuracy is: 95.40%


# Question 4 
#  Use the best model from the previous question to answer this question. Compare the 
Following weight initialization algorithms

# a). Random

In [48]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="random"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="random")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 70,
       eval_every = 10,
       seed=20190119,
           batch_size=70,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.324

Loss increased after epoch 20, final loss was 0.324, 
using the model from epoch 10
The model validation accuracy is: 95.17%


In [49]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="random"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="random")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.25, 
                        momentum=0.9, 
                        final_lr = 0.02, 
                        decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.322
Validation loss after 20 epochs is 0.320

Loss increased after epoch 30, final loss was 0.320, 
using the model from epoch 20
The model validation accuracy is: 95.40%


# b). Glorot

In [51]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 70,
       eval_every = 10,
       seed=20190119,
           batch_size=70,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.152

Loss increased after epoch 20, final loss was 0.152, 
using the model from epoch 10
The model validation accuracy is: 97.73%


In [52]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)
optimizer = SGDMomentum(0.25, 
                        momentum=0.9, 
                        final_lr = 0.02, 
                        decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 70,
            eval_every = 10,
            seed=20190119,
            batch_size=70);

calc_accuracy_model(model, X_test)


Validation loss after 10 epochs is 0.163
Validation loss after 20 epochs is 0.162

Loss increased after epoch 30, final loss was 0.162, 
using the model from epoch 20
The model validation accuracy is: 97.62%
