In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

class NeuralNetwork:

    @staticmethod
    def sigma(x):
        return 1 / (np.exp(-x) + 1)

    @staticmethod
    def sigma_deriv(x):
        sig = NeuralNetwork.sigma(x)
        deriv = sig * (1 - sig)
        return deriv

    @staticmethod
    def softmax(x):
        # softmax function ---------
        t = np.exp(x - np.max(x))
        return t / np.sum(t, axis=1, keepdims=True)

    @staticmethod
    def get_activation_function(name):
        if name == 'sigma':
            return NeuralNetwork.sigma
        elif name == 'linear':
            return lambda x: x
        elif name == 'softmax':
            return NeuralNetwork.softmax
        elif name == 'tanh':
            return np.tanh
        elif name == 'relu':
            return lambda x: np.maximum(0, x)

    @staticmethod
    def get_activation_derivative(name):
        if name == 'sigma':
            return NeuralNetwork.sigma_deriv
        elif name == 'linear':
            return lambda x: np.ones_like(x)
        elif name == 'softmax':
            # jacobian of softmax - unused -------
            def softmax_deriv(x):
                value = NeuralNetwork.softmax(x)
                SM = value.reshape((-1, 1))
                jac = np.diagflat(value) - np.dot(SM, SM.T)
                return jac

            return softmax_deriv
        elif name == 'tanh':
            return lambda x: 1 - np.tanh(x)**2
        elif name == 'relu':
            return lambda x: x > 0


    @staticmethod
    def get_loss_function(name):
        if name == 'mse':
            return lambda x_pred, x: np.linalg.norm(x_pred - x)
        elif name == 'mae':
            return lambda x_pred, x: np.sum(np.abs(x_pred - x))
        elif name == 'crossentropy':
            return lambda x_pred, x: -np.sum(x*np.log(x_pred))

    @staticmethod
    def get_loss_derivative(name):
        # currently unused
        if name == 'mse':
            return lambda x_pred, x: (x_pred - x)
        if name == 'mae':
            return lambda x_pred, x: np.sign(x_pred - x)
        elif name == 'crossentropy':
            return lambda x_pred, x: x*(-1/x_pred)

    class Layer:
        def __init__(self, input_width, layer_width, activation_function):
            # self.weights = np.random.uniform(0, 1, (input_width + 1, layer_width))
            self.weights = np.random.normal(0, 1, (input_width + 1, layer_width))
            self.activation_function = activation_function

        def predict(self, x):
            activation_function = NeuralNetwork.get_activation_function(self.activation_function)
            return activation_function(np.dot(x, self.weights))

        def feedforward_step(self, x):
            activation_function = NeuralNetwork.get_activation_function(self.activation_function)
            activation = np.dot(x, self.weights)
            response = activation_function(activation)
            return response, activation

    def __init__(self, input_width, output_width, activation_function='sigma', loss_function='mse', bias_exists=True, seed=None, verbose=True):
        ###
        # create a new nn object. activation_function specifies activation used on hidden layers
        # loss_function affects loss printed to console
        ###
        self.input_width = input_width
        self.output_width = output_width
        self.layers = []
        self.activation_function = activation_function
        self.loss_function = NeuralNetwork.get_loss_function(loss_function)
        self.loss_derivative = NeuralNetwork.get_loss_derivative(loss_function)
        self.bias_exists = bias_exists
        self.seed = seed
        self.verbose = verbose

    def add_layer(self, layer_width):
        ###
        # add a hidden layer with specified number of neurons
        ###
        if len(self.layers) == 0:
            self.layers.append(NeuralNetwork.Layer(self.input_width, layer_width, self.activation_function))
        else:
            self.layers.append(
                NeuralNetwork.Layer(self.layers[-1].weights.shape[1], layer_width, self.activation_function))


    def predict(self, x):
        ###
        # predict responses on new data
        ###
        values = np.copy(x)
        for layer in self.layers:
            values = np.hstack((values, np.ones((values.shape[0], 1)) if self.bias_exists else np.zeros((values.shape[0], 1))))
            values = layer.predict(values)
        return values

    def create_output_layer(self, activation_function='linear'):
        ###
        # create output layer with specified activation function. Use after adding all hidden layers and before training
        ###
        if len(self.layers) == 0:
            self.layers.append(NeuralNetwork.Layer(self.input_width, self.output_width, activation_function))
        else:
            self.layers.append(
                NeuralNetwork.Layer(self.layers[-1].weights.shape[1], self.output_width, activation_function))

    def feedforward(self, x):
        response = np.copy(x)
        response = response.reshape(1, -1)
        response = np.hstack((response, np.ones((response.shape[0], 1))))
        response_s = [response]
        activation_s = []
        for i in range(len(self.layers) - 1):
            response, activation = self.layers[i].feedforward_step(response_s[i])
            activation_s.append(activation)
            response = response.reshape(1, -1)
            response = np.hstack((response, np.ones((response.shape[0], 1))))
            response_s.append(response)
        response, activation = self.layers[-1].feedforward_step(response_s[-1])
        activation_s.append(activation)
        response = response.reshape(1, -1)
        response_s.append(response)
        return (response_s, activation_s)

    def backpropagation(self, x, y, r_s, a_s):
        e_s = [None] * len(self.layers)

        ###
        # derivative of loss function in respect to final layer weights
        # assuming loss is crossentropy and output layer is softmax
        # formula is the same as in the case of linear outputs and mse
        ###
        e_s[-1] = a_s[-1] - y

        for i in reversed(range(1, len(e_s))):
            unbiased_weights = self.layers[i].weights[0:(self.layers[i].weights.shape[0] - 1), :]
            e_s[i-1] = NeuralNetwork.get_activation_derivative(self.layers[i-1].activation_function)(a_s[i-1])*(e_s[i].dot(unbiased_weights.T))
        gradient = [r_s[j].T.dot(e_s[j]) for j in range(0, len(self.layers))]
        return gradient

    def train(self, x, y, batch_size=10, epochs=100, lr=0.01, method='basic', method_param=0.0):
        ###
        # train network. method can be 'basic', 'momentum' or 'rmsprop'.
        # method_param specifies lambda in momentum or beta in rmsprop
        ###
        if self.seed is not None:
            np.random.seed(self.seed)
        errors = []
        eps = 1e-8
        momentum = [np.zeros(layer.weights.shape) for layer in self.layers]
        for e in range(epochs):
            permutation = np.random.permutation(y.shape[0])
            x = x[permutation, :]
            y = y[permutation, :]
            i = 0
            while i < y.shape[0]:
                deltas = [np.zeros(layer.weights.shape) for layer in self.layers]
                x_batch = x[i:i + batch_size, :]
                y_batch = y[i:i + batch_size, :]
                i = i + batch_size
                for j in range(0, y_batch.shape[0]):
                    r_s, a_s = self.feedforward(x_batch[j, :])
                    gradient = self.backpropagation(x_batch[j, :], y_batch[j, :], r_s, a_s)
                    for k in range(0, len(deltas)):
                        deltas[k] = deltas[k] - gradient[k]

                if method == 'momentum':
                    momentum = [delta + method_param * mom for mom, delta in zip(momentum, deltas)]
                elif method == 'rmsprop':
                    momentum = [method_param * mom + (1 - method_param)*np.square(delta) for mom, delta in zip(momentum, deltas)]

                for j in range(0, len(deltas)):
                    if method == 'momentum':
                        self.layers[j].weights = self.layers[j].weights + lr*momentum[j]
                    elif method == 'rmsprop':
                        self.layers[j].weights = self.layers[j].weights + lr * (deltas[j] / (np.sqrt(momentum[j]) + eps))
                    else:
                        self.layers[j].weights = self.layers[j].weights + lr*deltas[j]

                error = self.loss_function(self.predict(x_batch), y_batch)
                if self.verbose:
                    print("loss on batch = {}".format(error))
                errors.append(error)
        return errors


# TESTS ------------------------------------------------------------------------
# set path to dataset folders
classification = "projekt1/classification/"
regression = "projekt1/regression/"

In [2]:
from keras.datasets import mnist
(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [3]:
train_X = train_X.reshape(-1, 784)
test_X = test_X.reshape(-1, 784)
X_mean = np.mean(train_X)
X_std = np.std(train_X)
train_X_std = (train_X - X_mean) / X_std
test_X_std = (test_X - X_mean) / X_std
train_X_scaled = train_X / 255
test_X_scaled = test_X / 255
train_X_centered = (train_X - 127.5) / 127.5
test_X_centered = (test_X - 127.5) / 127.5
mnist_encoder = OneHotEncoder(sparse=False)
train_y_onehot = mnist_encoder.fit_transform(train_y.reshape(-1,1))
test_y_onehot = mnist_encoder.transform(test_y.reshape(-1,1))
print(train_X_std.shape)
print(train_y_onehot.shape)

(60000, 784)
(60000, 10)


In [16]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True)

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=50, lr=.01, method='rmsprop', method_param=0.2)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

atch = 82.94672235393061
loss on batch = 80.61156839035098
loss on batch = 82.0136445924746
loss on batch = 84.11083308240171
loss on batch = 78.52638555693302
loss on batch = 85.8700172513968
loss on batch = 82.66693950619054
loss on batch = 79.51682553587332
loss on batch = 80.52111690601319
loss on batch = 84.7038035075073
loss on batch = 79.83482862846145
loss on batch = 84.84730633634523
loss on batch = 84.92410856661769
loss on batch = 81.5982121772735
loss on batch = 84.2766112865328
loss on batch = 84.6043872768486
loss on batch = 83.60725875096698
loss on batch = 82.26106627187615
loss on batch = 78.77493483928869
loss on batch = 82.55426584972369
loss on batch = 81.97973197296005
loss on batch = 86.7323973815145
loss on batch = 85.20265892508439
loss on batch = 85.53229417612758
loss on batch = 87.21514827176651
loss on batch = 83.42346241308542
loss on batch = 83.53202258975443
loss on batch = 81.67262929289494
loss on batch = 84.06150952188172
loss on batch = 77.93397048246

In [18]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True)

# Multiple layers
nn.add_layer(250)
nn.add_layer(200)
nn.add_layer(150)
nn.add_layer(100)
nn.add_layer(50)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=50, lr=.01, method='rmsprop', method_param=0.2)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

on batch = 84.06219733443274
loss on batch = 96.96634442356296
loss on batch = 84.16391428158781
loss on batch = 90.7073357904165
loss on batch = 79.01846076060227
loss on batch = 96.25606742005124
loss on batch = 86.79872517278105
loss on batch = 89.12842788418027
loss on batch = 86.57321463506597
loss on batch = 95.5901503626826
loss on batch = 84.1234436267309
loss on batch = 92.27592853302158
loss on batch = 84.64246809252637
loss on batch = 88.3407262563791
loss on batch = 89.21267663064728
loss on batch = 92.70430073977504
loss on batch = 85.42069917346754
loss on batch = 89.08860025774813
loss on batch = 78.82168340817614
loss on batch = 95.5623800156826
loss on batch = 81.36197341367541
loss on batch = 91.67074388666384
loss on batch = 85.46362843208925
loss on batch = 91.83574101768804
loss on batch = 90.86351512314667
loss on batch = 86.74469437970669
loss on batch = 82.81455925461096
loss on batch = 90.32497904119086
loss on batch = 81.6854768629442
loss on batch = 88.114979

In [24]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True)

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=2, batch_size=100, lr=.02, method='rmsprop', method_param=0.2)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

80.1277811524182
loss on batch = 193.04373605508303
loss on batch = 188.1689680002682
loss on batch = 185.85789388573812
loss on batch = 172.42014163145132
loss on batch = 185.28543234243995
loss on batch = 180.98445915993972
loss on batch = 189.85670771331863
loss on batch = 177.04707061200924
loss on batch = 189.992997349745
loss on batch = 182.55791083350644
loss on batch = 190.4439647354813
loss on batch = 183.8342904044249
loss on batch = 181.95383816469922
loss on batch = 169.41992910247433
loss on batch = 189.65554565544855
loss on batch = 184.774647327815
loss on batch = 182.47457220825632
loss on batch = 175.6107130340464
loss on batch = 184.4432598907973
loss on batch = 175.5019768132971
loss on batch = 184.1124680176759
loss on batch = 183.81707430220678
loss on batch = 179.81728990112106
loss on batch = 178.06831501121331
loss on batch = 188.02566015099012
loss on batch = 183.33143216417974
loss on batch = 186.20303731141945
loss on batch = 180.63430715097246
loss on batch 

In [25]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True)

# Multiple layers
nn.add_layer(500)
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(50)


nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=50, lr=.01, method='rmsprop', method_param=0.2)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

ch = 77.84789612770992
loss on batch = 85.15512132948348
loss on batch = 79.5461053503787
loss on batch = 84.44010488988687
loss on batch = 73.11118513679574
loss on batch = 84.9029293981136
loss on batch = 83.75567481204729
loss on batch = 85.27028895811557
loss on batch = 86.32930130999317
loss on batch = 86.48169382563461
loss on batch = 79.25475851962298
loss on batch = 78.72806372309518
loss on batch = 82.37341834449103
loss on batch = 84.72991350860319
loss on batch = 82.54928508454701
loss on batch = 88.35843220383828
loss on batch = 80.38815121525408
loss on batch = 82.78033762183239
loss on batch = 78.36408614562383
loss on batch = 81.87636656554412
loss on batch = 78.61283851857425
loss on batch = 87.07248550933666
loss on batch = 82.86050658694035
loss on batch = 89.14518634175089
loss on batch = 84.48703742482161
loss on batch = 87.6674297735445
loss on batch = 83.06205120426893
loss on batch = 76.77330840474951
loss on batch = 86.53711616590363
loss on batch = 79.326987124

In [38]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True)

# Multiple layers
nn.add_layer(200)
nn.add_layer(80)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

on batch = 15.927791839487483
loss on batch = 16.518367210535132
loss on batch = 15.92805057863
loss on batch = 15.771888976059909
loss on batch = 15.191375037509642
loss on batch = 16.43810285442617
loss on batch = 17.337090109098234
loss on batch = 17.64978441920854
loss on batch = 16.02431761379116
loss on batch = 17.608416434572526
loss on batch = 16.906405730039655
loss on batch = 16.33307491925413
loss on batch = 14.84077839430768
loss on batch = 15.87565655756176
loss on batch = 16.697541520124528
loss on batch = 15.827898317830288
loss on batch = 17.47201614073365
loss on batch = 17.244222963473188
loss on batch = 14.607476010669895
loss on batch = 15.837024141124552
loss on batch = 16.447216050227205
loss on batch = 16.43092577045047
loss on batch = 16.18602329968121
loss on batch = 16.4085418786207
loss on batch = 15.369725974527006
loss on batch = 15.25897922174087
loss on batch = 17.53232036356222
loss on batch = 16.775219641551658
loss on batch = 17.397703456370916
loss on

In [13]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True,  activation_function ='tanh')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

n batch = 14.572000605769903
loss on batch = 15.202963195083786
loss on batch = 16.539745383310255
loss on batch = 14.808924786371142
loss on batch = 15.263791830236768
loss on batch = 15.377144839246338
loss on batch = 14.852911757403739
loss on batch = 14.068169917309081
loss on batch = 17.682202196847335
loss on batch = 15.454443500293802
loss on batch = 15.119483232611207
loss on batch = 15.586574365745143
loss on batch = 15.386647336300843
loss on batch = 14.493193319796536
loss on batch = 16.422194166907325
loss on batch = 16.425386990597303
loss on batch = 14.81357764865727
loss on batch = 15.472163541842825
loss on batch = 15.674180401376919
loss on batch = 14.646249588739597
loss on batch = 15.092759459755543
loss on batch = 15.922818921233196
loss on batch = 15.058508075990865
loss on batch = 16.185759024390993
loss on batch = 15.726102129569759
loss on batch = 15.397597697145235
loss on batch = 16.986110927400297
loss on batch = 15.312297428412773
loss on batch = 15.55640311

In [19]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

391636645420121
loss on batch = 14.639313152792447
loss on batch = 15.993440665648157
loss on batch = 17.294457006825716
loss on batch = 15.590144733968526
loss on batch = 16.03789035933278
loss on batch = 14.990677415060976
loss on batch = 15.426720047312031
loss on batch = 14.690385293307505
loss on batch = 16.64994199730276
loss on batch = 15.517577028326459
loss on batch = 15.482643740461935
loss on batch = 15.004101993207868
loss on batch = 14.81533320483238
loss on batch = 13.920552053550361
loss on batch = 16.573712877271678
loss on batch = 15.86726018844237
loss on batch = 14.399448962249826
loss on batch = 16.692476798594953
loss on batch = 14.456447415228372
loss on batch = 15.142212381642171
loss on batch = 13.522004718202039
loss on batch = 15.643577708445537
loss on batch = 16.05996401776989
loss on batch = 13.33264126478989
loss on batch = 15.210294950077136
loss on batch = 15.420725904158406
loss on batch = 17.19520854859501
loss on batch = 14.808148845517014
loss on bat

In [22]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.05)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

= 14.406891529117981
loss on batch = 14.95302006970255
loss on batch = 15.300358187129243
loss on batch = 17.69426803929003
loss on batch = 14.597402437119202
loss on batch = 15.781744085700597
loss on batch = 14.195432317285196
loss on batch = 15.587729970056904
loss on batch = 14.068831514077369
loss on batch = 16.407783652644788
loss on batch = 13.937891937307842
loss on batch = 15.224056844417632
loss on batch = 15.35780900254586
loss on batch = 15.748936604877473
loss on batch = 17.21923287692341
loss on batch = 14.60117521157767
loss on batch = 16.673996472750325
loss on batch = 15.551216527271297
loss on batch = 14.077106883097827
loss on batch = 16.37709612467092
loss on batch = 14.491297067963416
loss on batch = 14.910370480222557
loss on batch = 16.16197634713235
loss on batch = 15.730191925452498
loss on batch = 16.576310029636566
loss on batch = 14.357614030938041
loss on batch = 14.332433387252816
loss on batch = 17.73117824865148
loss on batch = 15.751738239881966
loss on

In [23]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.15)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

batch = 14.737710486484097
loss on batch = 14.572355995924783
loss on batch = 16.71248643547966
loss on batch = 14.686964277758587
loss on batch = 15.609203558530487
loss on batch = 15.573783844031102
loss on batch = 15.55292047743616
loss on batch = 15.885508636341271
loss on batch = 16.302187393883102
loss on batch = 14.334641594692926
loss on batch = 16.24737162037089
loss on batch = 15.025638456482875
loss on batch = 15.15113912825361
loss on batch = 16.139289055384097
loss on batch = 15.823850532414507
loss on batch = 16.928863606296517
loss on batch = 14.286548139947353
loss on batch = 14.908002416869364
loss on batch = 14.806907161709509
loss on batch = 14.01795995250917
loss on batch = 15.55854639227178
loss on batch = 14.654399920634521
loss on batch = 16.87156262497567
loss on batch = 15.238970362324025
loss on batch = 14.456160171820367
loss on batch = 15.889465376428845
loss on batch = 17.22777744486241
loss on batch = 15.476927910932176
loss on batch = 15.199254939130542
l

In [24]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9172


In [25]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.012, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.917


In [26]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(80)
nn.add_layer(25)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9176


In [27]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=3, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9092


In [28]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(150)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=3, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9197


In [29]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(210)
nn.add_layer(55)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9151


In [30]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(196)
nn.add_layer(49)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9289


In [31]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(150)
nn.add_layer(49)
nn.add_layer(20)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9209


In [32]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(150)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=4, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9299


In [33]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(300)
nn.add_layer(150)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9238


In [37]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9318


In [38]:
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.939


In [40]:
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=1, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

  return 1 / (np.exp(-x) + 1)
Acc: 0.9349


In [42]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.08)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9241


In [43]:
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.08)
pred = nn.predict(test_X_std)

In [44]:
print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9281


In [45]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(200)
nn.add_layer(80)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9113


In [46]:
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

  return 1 / (np.exp(-x) + 1)
Acc: 0.9204


In [1]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_std, train_y_onehot, epochs=10, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_std)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

NameError: name 'NeuralNetwork' is not defined

In [6]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.938


In [8]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_centered, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_centered)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.8641


In [10]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=True,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=10, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

36020495
loss on batch = 14.800520751524518
loss on batch = 15.485943604457766
loss on batch = 14.805754016936326
loss on batch = 14.570986591256123
loss on batch = 15.16971585660474
loss on batch = 14.743118910294722
loss on batch = 14.802508456159797
loss on batch = 14.655103203596529
loss on batch = 15.325407061266771
loss on batch = 15.442947181789567
loss on batch = 16.408109528406193
loss on batch = 14.789963717499933
loss on batch = 14.447969771099363
loss on batch = 14.413868444254728
loss on batch = 15.343247497289711
loss on batch = 14.581568431356363
loss on batch = 14.761743265518426
loss on batch = 14.261780980025526
loss on batch = 14.736093177137631
loss on batch = 15.822203793903427
loss on batch = 16.275937712299424
loss on batch = 14.578667105935077
loss on batch = 14.860697387030195
loss on batch = 16.072671336170416
loss on batch = 15.622248405004381
loss on batch = 15.948167799667313
loss on batch = 14.927369491231529
loss on batch = 14.371516574726556
loss on batc

In [4]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=5, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9293


In [5]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.02, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9164


In [6]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.2)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9369


In [7]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=12, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9432


In [8]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
#nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9416


In [9]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9461


In [10]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
#nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.01, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.927


In [11]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
#nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=12, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9441


In [12]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=12, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.94


In [14]:
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=1, batch_size=12, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9476


In [15]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=12, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9471


In [16]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=5, batch_size=10, lr=.007, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9465


In [17]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=13, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.948


In [18]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=15, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9498


In [19]:
nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=20, lr=.008, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9465


In [20]:
 nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=15, lr=.007, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9418


In [21]:
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=4, batch_size=15, lr=.007, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9524


In [22]:
 nn = NeuralNetwork(784, 10, loss_function='crossentropy', seed=123, verbose=False,  activation_function ='sigma')

# Multiple layers
nn.add_layer(250)
nn.add_layer(100)
nn.add_layer(49)
nn.add_layer(10)

nn.create_output_layer('softmax')
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=10, batch_size=15, lr=.007, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9554


In [24]:
nn.errors = nn.train(train_X_scaled, train_y_onehot, epochs=1, batch_size=15, lr=.007, method='rmsprop', method_param=0.1)
pred = nn.predict(test_X_scaled)

print("Acc: " + str(np.mean((np.argmax(test_y_onehot, axis=1).flatten() == np.argmax(pred, axis=1).flatten()))))

Acc: 0.9521
