In [1]:
# The Softmax Cross Entropy Loss Function
import numpy as np
from lincoln import *

In [2]:
a = np.array([[0], [1], [2]])
normalize(a) # ?

array([[ 0,  1],
       [ 1,  0],
       [ 2, -1]])

In [3]:
unnormalize(a) # ?

array([[0]])

In [4]:
X_train, y_train, X_test, y_test = load()
# slice sets off to run faster with acceptable accuracy, down from 60000, 10000
X_train = X_train[0:15000]
y_train = y_train[0:15000]
X_test = X_test[0:2500]
y_test = y_test[0:2500]

In [5]:
# one-hot encode
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

In [6]:
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)

In [7]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-33.79115382653061, 221.2088461734694, -33.79115382653061, 221.2088461734694)

In [8]:
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [9]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(-0.4269135622397206,
 2.794727194095053,
 -0.4269135622397206,
 2.794727194095053)

In [10]:
def calc_accuracy_model(model, test_set):
    return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

In [11]:
# Mean squared error and sigmoid activation
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh()), 
            Dense(neurons=10, activation=Sigmoid())],
    loss = MeanSquaredError(normalize=False), 
    seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.822
Validation loss after 20 epochs is 0.741
Validation loss after 30 epochs is 0.713
Validation loss after 40 epochs is 0.696
Validation loss after 50 epochs is 0.680
The model validation accuracy is: 50.44%


In [12]:
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh()),
            Dense(neurons=10, activation=Sigmoid())],
    loss = MeanSquaredError(normalize=True), 
    seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.943
Loss increased after epoch 20, final loss was 0.943, using model from epoch 10
The model validation accuracy is: 43.48%


In [13]:
# Softmax Cross Entropy Loss
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Sigmoid()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGD(0.1))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 130, eval_every = 1, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 2.974
Validation loss after 2 epochs is 2.262
Validation loss after 3 epochs is 1.935
Validation loss after 4 epochs is 1.757
Validation loss after 5 epochs is 1.637
Validation loss after 6 epochs is 1.550
Validation loss after 7 epochs is 1.469
Validation loss after 8 epochs is 1.417
Validation loss after 9 epochs is 1.369
Validation loss after 10 epochs is 1.355
Validation loss after 11 epochs is 1.308
Validation loss after 12 epochs is 1.284
Validation loss after 13 epochs is 1.253
Validation loss after 14 epochs is 1.236
Validation loss after 15 epochs is 1.214
Validation loss after 16 epochs is 1.198
Validation loss after 17 epochs is 1.179
Validation loss after 18 epochs is 1.167
Validation loss after 19 epochs is 1.152
Validation loss after 20 epochs is 1.143
Validation loss after 21 epochs is 1.135
Validation loss after 22 epochs is 1.132
Validation loss after 23 epochs is 1.123
Validation loss after 24 epochs is 1.114
Validation loss after 25 

In [14]:
# SGD Momentum
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Sigmoid()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.9))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 1, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 1 epochs is 1.208
Validation loss after 2 epochs is 1.020
Validation loss after 3 epochs is 0.912
Validation loss after 4 epochs is 0.887
Validation loss after 5 epochs is 0.860
Loss increased after epoch 6, final loss was 0.860, using model from epoch 5
The model validation accuracy is: 85.08%


In [15]:
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.1, momentum=0.9))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.994
Loss increased after epoch 20, final loss was 0.994, using model from epoch 10
The model validation accuracy is: 87.32%


In [16]:
# Different weight decay
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

optimizer = SGDMomentum(0.15, momentum=0.9, final_lr = 0.05, decay_type='linear')
trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.877
Validation loss after 20 epochs is 0.806
Loss increased after epoch 30, final loss was 0.806, using model from epoch 20
The model validation accuracy is: 90.04%


In [17]:
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

optimizer = SGDMomentum(0.2, momentum=0.9, final_lr = 0.05, decay_type='exponential')
trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 1.163
Validation loss after 20 epochs is 0.969
Validation loss after 30 epochs is 0.863
Loss increased after epoch 40, final loss was 0.863, using model from epoch 30
The model validation accuracy is: 88.36%


In [18]:
# Weight Initialization
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh(), weight_init="glorot"),
            Dense(neurons=10, activation=Linear(), weight_init="glorot")],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

optimizer = SGDMomentum(0.15, momentum=0.9, final_lr = 0.05, decay_type='linear')
trainer = Trainer(model, optimizer)
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60, early_stopping=True)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.705
Validation loss after 20 epochs is 0.654
Validation loss after 30 epochs is 0.605
Loss increased after epoch 40, final loss was 0.605, using model from epoch 30
The model validation accuracy is: 92.68%


In [19]:
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh(), weight_init="glorot"),
            Dense(neurons=10, activation=Linear(), weight_init="glorot")],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.2, momentum=0.9, final_lr = 0.05, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60, early_stopping=True)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.871
Validation loss after 20 epochs is 0.844
Validation loss after 30 epochs is 0.793
Loss increased after epoch 40, final loss was 0.793, using model from epoch 30
The model validation accuracy is: 90.76%


In [20]:
# Dropout
model = NeuralNetwork(
    layers=[Dense(neurons=89, activation=Tanh(), weight_init="glorot", dropout=0.8),
            Dense(neurons=10, activation=Linear(), weight_init="glorot")],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.2, momentum=0.9, final_lr = 0.05, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, seed=20190119, batch_size=60, early_stopping=True)
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.866
Validation loss after 20 epochs is 0.766
Loss increased after epoch 30, final loss was 0.766, using model from epoch 20
The model validation accuracy is: 90.52%


In [21]:
# Deep Learning, with and without Dropout
model = NeuralNetwork(
    layers=[Dense(neurons=178, activation=Tanh(), weight_init="glorot", dropout=0.8),
            Dense(neurons=46, activation=Tanh(), weight_init="glorot", dropout=0.8),
            Dense(neurons=10, activation=Linear(), weight_init="glorot")],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.2, momentum=0.9, final_lr = 0.05, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 100, eval_every = 10, seed=20190119, batch_size=60, early_stopping=True)
calc_accuracy_model(model, X_test)


Validation loss after 10 epochs is 0.755
Loss increased after epoch 20, final loss was 0.755, using model from epoch 10
The model validation accuracy is: 90.88%


In [None]:
model = NeuralNetwork(
    layers=[Dense(neurons=178, activation=Tanh(), weight_init="glorot"),
            Dense(neurons=46, activation=Tanh(), weight_init="glorot"),
            Dense(neurons=10, activation=Linear(), weight_init="glorot")],
    loss = SoftmaxCrossEntropy(), 
    seed=20190119)

trainer = Trainer(model, SGDMomentum(0.2, momentum=0.9, final_lr = 0.05, decay_type='exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 100, eval_every = 10, seed=20190119, batch_size=60, early_stopping=True)
calc_accuracy_model(model, X_test)