This notebook contains experiments for:

* Loss functions
* Learning rate decay
* Weight initialization
* Optimizers
* Dropout

# `lincoln` imports

In [1]:
import numpy as np

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import lincoln
from lincoln.layers import Dense
from lincoln.losses import SoftmaxCrossEntropy, MeanSquaredError
from lincoln.optimizers import Optimizer, SGD, SGDMomentum
from lincoln.activations import Sigmoid, Tanh, Linear, ReLU
from lincoln.network import NeuralNetwork
from lincoln.train import Trainer
from lincoln.utils import mnist
from lincoln.utils.np_utils import softmax

In [4]:
mnist.init()

Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [5]:
X_train, y_train, X_test, y_test = mnist.load()

In [6]:
num_labels = len(y_train)
num_labels

60000

In [7]:
# one-hot encode
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

# MNIST Demos

# Scale data to mean 0, variance 1

In [8]:
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)

In [9]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-33.318421449829934,
 221.68157855017006,
 -33.318421449829934,
 221.68157855017006)

In [10]:
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [11]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-0.424073894391566, 2.821543345689335, -0.424073894391566, 2.821543345689335)

In [12]:
def calc_accuracy_model(model, test_set):
    return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

## Question 1

## a. 100 RELU hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [56]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=ReLU()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.809
Validation loss after 20 epochs is 0.735
Validation loss after 30 epochs is 0.722
Validation loss after 40 epochs is 0.706

Loss increased after epoch 50, final loss was 0.706, 
using the model from epoch 40

The model validation accuracy is: 38.37%


In [57]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=ReLU()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)


Loss increased after epoch 10, final loss was 1000000000.000, 
using the model from epoch 0

The model validation accuracy is: 9.80%


## b. 100 Tanh hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [20]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.589
Validation loss after 20 epochs is 0.545
Validation loss after 30 epochs is 0.471
Validation loss after 40 epochs is 0.440
Validation loss after 50 epochs is 0.394

The model validation accuracy is: 71.13%


Note: even if we normalize the outputs of a classification model with mean squared error loss, it still doesn't help:

In [21]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.876

Loss increased after epoch 20, final loss was 0.876, 
using the model from epoch 10
The model validation accuracy is: 47.64%


The reason is that we should be using softmax cross entropy loss!

## e. 100 Tanh hidden units, 10 Linear output units, MeanSquaredError loss

In [29]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  loss = np.sum(np.power(self.prediction - self.target, 2)) / self.prediction.shape[0]
  return np.matmul(output_grad, self.param.transpose(1, 0))
  return output_grad * (1 - self.output * self.output)



Loss increased after epoch 10, final loss was 1000000000.000, 
using the model from epoch 0

The model validation accuracy is: 9.80%


In [30]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 8792.413
Validation loss after 20 epochs is 3923.053
Validation loss after 30 epochs is 2516.119
Validation loss after 40 epochs is 1852.495
Validation loss after 50 epochs is 1469.342

The model validation accuracy is: 26.81%


## 100 Tanh hidden units, 10 Linear output units, Softmax loss

In [55]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 130,
            eval_every = 1,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 1.499
Validation loss after 2 epochs is 1.075
Validation loss after 3 epochs is 0.886
Validation loss after 4 epochs is 0.788
Validation loss after 5 epochs is 0.725
Validation loss after 6 epochs is 0.677
Validation loss after 7 epochs is 0.650
Validation loss after 8 epochs is 0.635
Validation loss after 9 epochs is 0.622
Validation loss after 10 epochs is 0.612
Validation loss after 11 epochs is 0.597
Validation loss after 12 epochs is 0.585
Validation loss after 13 epochs is 0.579
Validation loss after 14 epochs is 0.575
Validation loss after 15 epochs is 0.567
Validation loss after 16 epochs is 0.562
Validation loss after 17 epochs is 0.560
Validation loss after 18 epochs is 0.556
Validation loss after 19 epochs is 0.555
Validation loss after 20 epochs is 0.551
Validation loss after 21 epochs is 0.549

Loss increased after epoch 22, final loss was 0.549, 
using the model from epoch 21

The model validation accuracy is: 90.16%


#### Trying sigmoid activation

## c. 100 Sigmoid hidden units, 10 Sigmoid output units, MeanSquaredError loss

In [23]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=False), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.641
Validation loss after 20 epochs is 0.408
Validation loss after 30 epochs is 0.366
Validation loss after 40 epochs is 0.350
Validation loss after 50 epochs is 0.341

The model validation accuracy is: 72.91%


In [24]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Sigmoid())],
            loss = MeanSquaredError(normalize=True), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.589

Loss increased after epoch 20, final loss was 0.589, 
using the model from epoch 10

The model validation accuracy is: 61.90%


## d. 100 Sigmoid hidden units, 10 Linear output units, Softmax loss

In [22]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 130,
            eval_every = 1,
            seed=20190119,
            batch_size=60);
print()
calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 1.241
Validation loss after 2 epochs is 0.948
Validation loss after 3 epochs is 0.825
Validation loss after 4 epochs is 0.750
Validation loss after 5 epochs is 0.706
Validation loss after 6 epochs is 0.667
Validation loss after 7 epochs is 0.636
Validation loss after 8 epochs is 0.616
Validation loss after 9 epochs is 0.597
Validation loss after 10 epochs is 0.584
Validation loss after 11 epochs is 0.569
Validation loss after 12 epochs is 0.557
Validation loss after 13 epochs is 0.552
Validation loss after 14 epochs is 0.540
Validation loss after 15 epochs is 0.532
Validation loss after 16 epochs is 0.524
Validation loss after 17 epochs is 0.518
Validation loss after 18 epochs is 0.512
Validation loss after 19 epochs is 0.507
Validation loss after 20 epochs is 0.498
Validation loss after 21 epochs is 0.496
Validation loss after 22 epochs is 0.495
Validation loss after 23 epochs is 0.488
Validation loss after 24 epochs is 0.484
Validation loss after 25 

## SGD Momentum

## Question 2 

## a. Momentum = 0.7

In [34]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.7)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.7))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 1,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.788
Validation loss after 2 epochs is 0.618
Validation loss after 3 epochs is 0.554
Validation loss after 4 epochs is 0.528
Validation loss after 5 epochs is 0.510
Validation loss after 6 epochs is 0.488
Validation loss after 7 epochs is 0.462
Validation loss after 8 epochs is 0.450

Loss increased after epoch 9, final loss was 0.450, 
using the model from epoch 8
The model validation accuracy is: 92.09%


In [35]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGD(0.1)

optim = SGDMomentum(0.1, momentum=0.7)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.7))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.471
Validation loss after 20 epochs is 0.458

Loss increased after epoch 30, final loss was 0.458, 
using the model from epoch 20
The model validation accuracy is: 92.55%


## b. Momentum = 0.8


In [36]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.8)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.8))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 1,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.699
Validation loss after 2 epochs is 0.554
Validation loss after 3 epochs is 0.499
Validation loss after 4 epochs is 0.476
Validation loss after 5 epochs is 0.455
Validation loss after 6 epochs is 0.437
Validation loss after 7 epochs is 0.408
Validation loss after 8 epochs is 0.407
Validation loss after 9 epochs is 0.400
Validation loss after 10 epochs is 0.393
Validation loss after 11 epochs is 0.389
Validation loss after 12 epochs is 0.374

Loss increased after epoch 13, final loss was 0.374, 
using the model from epoch 12
The model validation accuracy is: 93.47%


In [37]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGD(0.1)

optim = SGDMomentum(0.1, momentum=0.8)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.8))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.411

Loss increased after epoch 20, final loss was 0.411, 
using the model from epoch 10
The model validation accuracy is: 93.77%


## c. Momentum = 0.9

In [38]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGDMomentum(0.1, momentum=0.9)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.9))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 1,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 0.580
Validation loss after 2 epochs is 0.462
Validation loss after 3 epochs is 0.415
Validation loss after 4 epochs is 0.402
Validation loss after 5 epochs is 0.388
Validation loss after 6 epochs is 0.371
Validation loss after 7 epochs is 0.350
Validation loss after 8 epochs is 0.341

Loss increased after epoch 9, final loss was 0.341, 
using the model from epoch 8
The model validation accuracy is: 93.75%


In [39]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optim = SGD(0.1)

optim = SGDMomentum(0.1, momentum=0.9)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.9))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.368
Validation loss after 20 epochs is 0.353
Validation loss after 30 epochs is 0.330
Validation loss after 40 epochs is 0.320

Loss increased after epoch 50, final loss was 0.320, 
using the model from epoch 40
The model validation accuracy is: 95.62%


## Different weight decay

## Question 3 

## a. Linear decay from 0.2 to 0.02

In [40]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.303

Loss increased after epoch 20, final loss was 0.303, 
using the model from epoch 10
The model validation accuracy is: 95.29%


In [41]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.480
Validation loss after 20 epochs is 0.402
Validation loss after 30 epochs is 0.307
Validation loss after 40 epochs is 0.301

Loss increased after epoch 50, final loss was 0.301, 
using the model from epoch 40
The model validation accuracy is: 95.98%


## b. Exponential decay from 0.25 to 0.02

In [42]:
model = NeuralNetwork(
    layers=[Dense(neurons=89, 
                  activation=Sigmoid()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.25, 
                        momentum=0.9, 
                        final_lr = 0.02, 
                        decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.301

Loss increased after epoch 20, final loss was 0.301, 
using the model from epoch 10
The model validation accuracy is: 95.30%


In [43]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.25, 
                        momentum=0.9, 
                        final_lr = 0.02, 
                        decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50,
            eval_every = 10,
            seed=20190119,
            batch_size=60);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.429
Validation loss after 20 epochs is 0.370
Validation loss after 30 epochs is 0.301

Loss increased after epoch 40, final loss was 0.301, 
using the model from epoch 30
The model validation accuracy is: 95.47%


## Changing weight init

## Question 4 

## a. Random

In [44]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="random"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="random")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.303

Loss increased after epoch 20, final loss was 0.303, 
using the model from epoch 10
The model validation accuracy is: 95.29%


In [45]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh(),
                  weight_init="random"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="random")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.480
Validation loss after 20 epochs is 0.402
Validation loss after 30 epochs is 0.307
Validation loss after 40 epochs is 0.301

Loss increased after epoch 50, final loss was 0.301, 
using the model from epoch 40
The model validation accuracy is: 95.98%


## b. Glorot

In [47]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.157

Loss increased after epoch 20, final loss was 0.157, 
using the model from epoch 10
The model validation accuracy is: 97.75%


In [48]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.02, decay_type='linear')

trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.460
Validation loss after 20 epochs is 0.312
Validation loss after 30 epochs is 0.267
Validation loss after 40 epochs is 0.264

Loss increased after epoch 50, final loss was 0.264, 
using the model from epoch 40
The model validation accuracy is: 96.45%


## Dropout

In [49]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Sigmoid(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.152
Validation loss after 20 epochs is 0.142

Loss increased after epoch 30, final loss was 0.142, 
using the model from epoch 20
The model validation accuracy is: 97.71%


In [50]:
model = NeuralNetwork(
    layers=[Dense(neurons=100, 
                  activation=Tanh(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 50,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.318
Validation loss after 20 epochs is 0.244
Validation loss after 30 epochs is 0.214
Validation loss after 40 epochs is 0.210
Validation loss after 50 epochs is 0.210
The model validation accuracy is: 96.64%


## Deep Learning, with and without Dropout

In [51]:
model = NeuralNetwork(
    layers=[Dense(neurons=200, 
                  activation=Sigmoid(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=46, 
                  activation=Tanh(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 100,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.163
Validation loss after 20 epochs is 0.150

Loss increased after epoch 30, final loss was 0.150, 
using the model from epoch 20
The model validation accuracy is: 98.10%


In [52]:
model = NeuralNetwork(
    layers=[Dense(neurons=200, 
                  activation=Tanh(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=46, 
                  activation=Tanh(),
                  weight_init="glorot",
                  dropout=0.8),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 100,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.390
Validation loss after 20 epochs is 0.322
Validation loss after 30 epochs is 0.263
Validation loss after 40 epochs is 0.231
Validation loss after 50 epochs is 0.221
Validation loss after 60 epochs is 0.208
Validation loss after 70 epochs is 0.198
Validation loss after 80 epochs is 0.188

Loss increased after epoch 90, final loss was 0.188, 
using the model from epoch 80
The model validation accuracy is: 97.01%


In [53]:
model = NeuralNetwork(
    layers=[Dense(neurons=200, 
                  activation=Sigmoid(),
                  weight_init="glorot"),
            Dense(neurons=46, 
                  activation=Tanh(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 100,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.206

Loss increased after epoch 20, final loss was 0.206, 
using the model from epoch 10
The model validation accuracy is: 97.67%


In [54]:
model = NeuralNetwork(
    layers=[Dense(neurons=200, 
                  activation=Tanh(),
                  weight_init="glorot"),
            Dense(neurons=46, 
                  activation=Tanh(),
                  weight_init="glorot"),
            Dense(neurons=10, 
                  activation=Linear(),
                  weight_init="glorot")],
            loss = SoftmaxCrossEntropy(), 
seed=20190119)

trainer = Trainer(model, SGDMomentum(0.25, momentum=0.9, final_lr = 0.02, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
       epochs = 100,
       eval_every = 10,
       seed=20190119,
           batch_size=60,
           early_stopping=True);

calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.564
Validation loss after 20 epochs is 0.438
Validation loss after 30 epochs is 0.360
Validation loss after 40 epochs is 0.313
Validation loss after 50 epochs is 0.307
Validation loss after 60 epochs is 0.289

Loss increased after epoch 70, final loss was 0.289, 
using the model from epoch 60
The model validation accuracy is: 95.80%
