In [2]:
import sklearn.datasets as datasets

In [3]:
digits = datasets.load_digits()

In [4]:
digits.data.shape

(1797, 64)

In [5]:
digits.target.shape

(1797,)

In [6]:
import numpy as np

In [7]:
sizes = [digits.data.shape[1], 16, 16, 10]

In [8]:
sizes

[64, 16, 16, 10]

In [9]:
weights = [np.random.randn(x, y) for x, y in zip(sizes[:-1], sizes[1:])]

In [10]:
weights

[array([[-1.45925885,  0.48421544, -0.7880417 , ..., -0.48558135,
         -0.09058042, -1.4996869 ],
        [ 0.08556095, -0.66597537, -0.29572179, ...,  0.1482121 ,
         -1.19252687, -1.27271259],
        [-1.13761115, -0.33863198, -0.5010875 , ..., -0.25252456,
          1.97827172,  1.24986115],
        ...,
        [ 0.51451048,  1.38107286,  0.86944066, ..., -0.07886087,
          0.98088437,  0.00639155],
        [-0.07713306, -0.29766385,  1.82196367, ..., -0.46005961,
          0.1687411 , -0.14741362],
        [ 1.67562995, -0.81973236,  0.79643835, ...,  0.05247948,
         -1.73647545,  0.64169606]]),
 array([[-4.01951641e-01, -2.21780819e+00,  3.81192034e-02,
         -1.03615474e+00,  6.87094031e-02,  1.10405985e+00,
          1.01025273e+00,  4.70212228e-01,  4.06509717e-01,
          1.30471129e+00,  8.24139027e-01,  5.51574833e-01,
         -1.18191248e+00, -1.47566464e-01,  1.05039944e-01,
          2.91338300e+00],
        [ 1.05241404e+00, -4.99741108e-01,  8.

In [11]:
biases = [np.random.randn(1, y) for y in sizes[1:]]

In [12]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [13]:
def feed_forward(a, weights, biases):
    for i in range(len(weights) - 1):
        a = sigmoid(np.dot(a, weights[i]) + biases[i])
    ez = np.exp(np.dot(a, weights[-1]) + biases[-1])
    a = ez / ez.sum(axis = 1, keepdims = True)
    return a

In [14]:
yhat = feed_forward(digits.data, weights, biases)

In [15]:
yhat[0]

array([2.34224373e-02, 1.66534785e-02, 1.16458412e-02, 1.01351772e-03,
       1.53620981e-03, 4.23515043e-04, 2.61058536e-03, 9.30857464e-01,
       3.21393999e-03, 8.62301138e-03])

In [16]:
digits.target[0]

0

In [17]:
Y = np.zeros((digits.data.shape[0], 10))

In [18]:
Y[np.arange(digits.data.shape[0]), digits.target] = 1

In [19]:
Y[0:10]

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [20]:
delta = yhat - Y

In [21]:
gw = [np.zeros(w.shape) for w in weights]
gb = [np.zeros(b.shape) for b in biases]

In [40]:
### copy paste
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

def classification_rate(y, yHat):
    n_correct = 0
    for i in range(len(y)):
        if (y[i] == yHat[i]):
            n_correct += 1
    crate = float(n_correct)/len(y)
    return crate

class Network(object):

    def __init__(self, sizes):
        self.layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(1, y) for y in sizes[1:]]
        self.weights =[np.random.randn(x, y) for x,y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for w,b in zip(self.weights, self.biases):
            a = sigmoid(np.dot(a,w) + b)
        return a

    def feedforward_c(self, a):

        # Apply sigmoid all hidden layers except output layer
        for i in xrange(self.layers-2):
            a = sigmoid(np.dot(a, self.weights[i]) + self.biases[i])

        # Apply softmax for the output layer
        ez = np.exp(np.dot(a, self.weights[-1]) + self.biases[-1])
        a = ez/ez.sum(axis=1, keepdims=True)
        return a 

    def one_hot_encoding(self, y):
        sz = self.sizes[-1] # Output layer size
        oneHotY = np.zeros((len(y), sz))
        for i in range(len(y)):
            oneHotY[i, y[i]] = 1
        return oneHotY

    def back_propagation(self, a, y):

        # Store intermediate activations and z
        zs = []
        activations = [a]
        for i in range(self.layers-2):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = sigmoid(z)
            zs.append(z)
            activations.append(a)

        # Compute output layer output (softmax)
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        ez = np.exp(z)
        yHat = ez/ez.sum(axis=1, keepdims=True)

        # Compute delta at output layer (dJ/dz) - cross entory cost function
        delta = yHat - self.one_hot_encoding(y)

        # gradients (basis, weights) for each layer
        gw = [np.zeros(w.shape) for w in self.weights]
        gb = [np.zeros(b.shape) for b in self.biases]

        # Last layers weight and basis gradient
        gb[-1] = delta.sum(axis=0)
        gw[-1] = np.dot(activations[-1].T, delta)

        # Remaining layers gradients (bias, weight)
        for l in range(2, self.layers):
            delta = np.multiply(np.dot(delta, self.weights[-l+1].T), sigmoid_prime(zs[-l+1]))
            gb[-l] = delta.sum(axis=0)
            gw[-l] = np.dot(activations[-l].T, delta)

        return(gw, gb, yHat)

    def cost(self, t, y):
        c = t*np.log(y)
        return (-1.0)*np.sum(c)

    def gradient_descent(self, epochs, eta, x, y):

        # Update weights in each iteration
        yOneHot = N.one_hot_encoding(y)
        for i in range(epochs):
            gw, gb, yHat  = self.back_propagation(x, y)
            cl_rate = classification_rate(y, np.argmax(yHat, axis=1))
            cost = self.cost(yOneHot, yHat)
            print ("Iteration: ", i, " Classification rate: ", cl_rate,  " Cost: ", cost)

            # Update weights and biases
            self.weights = [ w-((eta)/len(y))*dw for w, dw in zip(self.weights, gw)]
            self.biases = [ b-((eta)/len(y))*db for b, db in zip(self.biases, gb)]


# Example - Create 3 different types of clsuters with 500 points each
npoints = 500
X1 = np.random.randn(npoints, 2) + np.array([0, -2]) # Just add some offset
X2 = np.random.randn(npoints, 2) + np.array([2, 2]) # Just add some offset
X3 = np.random.randn(npoints, 2) + np.array([-2, 2]) # Just add some offset
X = np.vstack([X1, X2, X3])
print(X.shape)
print(X[0])
# Create labels for each class (0, 1 and 2)
Y = np.array([0]*npoints + [1]*npoints + [2]*npoints)
print(Y.shape)
print(Y[0])
# Create neural network and run gradient descent
N = Network([2, 15, 3])
#N.gradient_descent(1000, 0.001, X, Y)

(500, 2)
(1500, 2)
[ 0.55580968 -1.12677442]
(1500,)
0


In [42]:
import sklearn.datasets as datasets
digits = datasets.load_digits()
X = digits.data
X -= np.mean(X, axis=0)
y = digits.target

In [45]:
N = Network([64, 32, 32, 16, 16, 16, 10])

In [46]:
N.gradient_descent(1000, 0.001, X, y)

Iteration:  0  Classification rate:  0.10072342793544797  Cost:  6363.1177328627255
Iteration:  1  Classification rate:  0.10072342793544797  Cost:  6360.835489869945
Iteration:  2  Classification rate:  0.10127991096271564  Cost:  6358.556999822373
Iteration:  3  Classification rate:  0.1018363939899833  Cost:  6356.282252308101
Iteration:  4  Classification rate:  0.1018363939899833  Cost:  6354.011236954488
Iteration:  5  Classification rate:  0.10072342793544797  Cost:  6351.743943427987
Iteration:  6  Classification rate:  0.10072342793544797  Cost:  6349.480361433972
Iteration:  7  Classification rate:  0.10072342793544797  Cost:  6347.220480716563
Iteration:  8  Classification rate:  0.10072342793544797  Cost:  6344.964291058459
Iteration:  9  Classification rate:  0.10072342793544797  Cost:  6342.711782280769
Iteration:  10  Classification rate:  0.10072342793544797  Cost:  6340.462944242834
Iteration:  11  Classification rate:  0.1001669449081803  Cost:  6338.217766842071
Iter

Iteration:  102  Classification rate:  0.10851419031719532  Cost:  6148.026117076171
Iteration:  103  Classification rate:  0.10851419031719532  Cost:  6146.079111982441
Iteration:  104  Classification rate:  0.10851419031719532  Cost:  6144.134973509747
Iteration:  105  Classification rate:  0.10851419031719532  Cost:  6142.193694483581
Iteration:  106  Classification rate:  0.10851419031719532  Cost:  6140.255267754587
Iteration:  107  Classification rate:  0.10851419031719532  Cost:  6138.319686198454
Iteration:  108  Classification rate:  0.10851419031719532  Cost:  6136.386942715805
Iteration:  109  Classification rate:  0.10907067334446299  Cost:  6134.457030232091
Iteration:  110  Classification rate:  0.10907067334446299  Cost:  6132.529941697478
Iteration:  111  Classification rate:  0.10907067334446299  Cost:  6130.605670086748
Iteration:  112  Classification rate:  0.10962715637173066  Cost:  6128.684208399184
Iteration:  113  Classification rate:  0.11018363939899833  Cost:

Iteration:  203  Classification rate:  0.11908736783528102  Cost:  5964.754006132093
Iteration:  204  Classification rate:  0.1196438508625487  Cost:  5963.064078635838
Iteration:  205  Classification rate:  0.12020033388981637  Cost:  5961.376403710086
Iteration:  206  Classification rate:  0.12020033388981637  Cost:  5959.6909762397645
Iteration:  207  Classification rate:  0.12020033388981637  Cost:  5958.007791125997
Iteration:  208  Classification rate:  0.1196438508625487  Cost:  5956.326843286047
Iteration:  209  Classification rate:  0.1196438508625487  Cost:  5954.648127653244
Iteration:  210  Classification rate:  0.12020033388981637  Cost:  5952.9716391769125
Iteration:  211  Classification rate:  0.12020033388981637  Cost:  5951.297372822304
Iteration:  212  Classification rate:  0.12020033388981637  Cost:  5949.625323570533
Iteration:  213  Classification rate:  0.12020033388981637  Cost:  5947.9554864185
Iteration:  214  Classification rate:  0.12020033388981637  Cost:  5

Iteration:  302  Classification rate:  0.12020033388981637  Cost:  5807.6324701055255
Iteration:  303  Classification rate:  0.12020033388981637  Cost:  5806.143035085532
Iteration:  304  Classification rate:  0.12020033388981637  Cost:  5804.655415061603
Iteration:  305  Classification rate:  0.12020033388981637  Cost:  5803.169606231581
Iteration:  306  Classification rate:  0.12020033388981637  Cost:  5801.685604804023
Iteration:  307  Classification rate:  0.12020033388981637  Cost:  5800.203406998142
Iteration:  308  Classification rate:  0.12020033388981637  Cost:  5798.723009043782
Iteration:  309  Classification rate:  0.12020033388981637  Cost:  5797.244407181368
Iteration:  310  Classification rate:  0.12020033388981637  Cost:  5795.767597661858
Iteration:  311  Classification rate:  0.1196438508625487  Cost:  5794.292576746714
Iteration:  312  Classification rate:  0.11908736783528102  Cost:  5792.819340707848
Iteration:  313  Classification rate:  0.11908736783528102  Cost:

Iteration:  418  Classification rate:  0.12242626599888703  Cost:  5646.070752891978
Iteration:  419  Classification rate:  0.1229827490261547  Cost:  5644.768937006334
Iteration:  420  Classification rate:  0.12353923205342238  Cost:  5643.468557057087
Iteration:  421  Classification rate:  0.12353923205342238  Cost:  5642.169610247411
Iteration:  422  Classification rate:  0.12353923205342238  Cost:  5640.872093787449
Iteration:  423  Classification rate:  0.12409571508069003  Cost:  5639.576004894278
Iteration:  424  Classification rate:  0.12409571508069003  Cost:  5638.2813407919
Iteration:  425  Classification rate:  0.12409571508069003  Cost:  5636.9880987112065
Iteration:  426  Classification rate:  0.12409571508069003  Cost:  5635.69627588996
Iteration:  427  Classification rate:  0.1246521981079577  Cost:  5634.405869572774
Iteration:  428  Classification rate:  0.1246521981079577  Cost:  5633.116877011082
Iteration:  429  Classification rate:  0.12520868113522537  Cost:  563

Iteration:  527  Classification rate:  0.12687813021702837  Cost:  5512.073812011087
Iteration:  528  Classification rate:  0.12687813021702837  Cost:  5510.913394857738
Iteration:  529  Classification rate:  0.12687813021702837  Cost:  5509.754145428107
Iteration:  530  Classification rate:  0.12687813021702837  Cost:  5508.596061567535
Iteration:  531  Classification rate:  0.12687813021702837  Cost:  5507.439141126325
Iteration:  532  Classification rate:  0.12687813021702837  Cost:  5506.283381959748
Iteration:  533  Classification rate:  0.12687813021702837  Cost:  5505.128781928013
Iteration:  534  Classification rate:  0.12743461324429606  Cost:  5503.975338896268
Iteration:  535  Classification rate:  0.12743461324429606  Cost:  5502.823050734574
Iteration:  536  Classification rate:  0.12687813021702837  Cost:  5501.671915317899
Iteration:  537  Classification rate:  0.12687813021702837  Cost:  5500.521930526099
Iteration:  538  Classification rate:  0.12632164718976072  Cost:

Iteration:  627  Classification rate:  0.12687813021702837  Cost:  5401.4824378414
Iteration:  628  Classification rate:  0.12687813021702837  Cost:  5400.428881123962
Iteration:  629  Classification rate:  0.12743461324429606  Cost:  5399.376299239405
Iteration:  630  Classification rate:  0.12799109627156371  Cost:  5398.324690469913
Iteration:  631  Classification rate:  0.12799109627156371  Cost:  5397.274053101489
Iteration:  632  Classification rate:  0.12799109627156371  Cost:  5396.224385423962
Iteration:  633  Classification rate:  0.12799109627156371  Cost:  5395.175685730971
Iteration:  634  Classification rate:  0.12799109627156371  Cost:  5394.12795231995
Iteration:  635  Classification rate:  0.12799109627156371  Cost:  5393.08118349213
Iteration:  636  Classification rate:  0.12799109627156371  Cost:  5392.035377552521
Iteration:  637  Classification rate:  0.12799109627156371  Cost:  5390.990532809906
Iteration:  638  Classification rate:  0.12799109627156371  Cost:  53

Iteration:  725  Classification rate:  0.12687813021702837  Cost:  5302.619213424471
Iteration:  726  Classification rate:  0.12687813021702837  Cost:  5301.653576106093
Iteration:  727  Classification rate:  0.12687813021702837  Cost:  5300.688762156378
Iteration:  728  Classification rate:  0.12687813021702837  Cost:  5299.72477019267
Iteration:  729  Classification rate:  0.12687813021702837  Cost:  5298.761598835357
Iteration:  730  Classification rate:  0.12687813021702837  Cost:  5297.799246707868
Iteration:  731  Classification rate:  0.12687813021702837  Cost:  5296.83771243666
Iteration:  732  Classification rate:  0.12743461324429606  Cost:  5295.876994651214
Iteration:  733  Classification rate:  0.12743461324429606  Cost:  5294.917091984029
Iteration:  734  Classification rate:  0.12743461324429606  Cost:  5293.958003070612
Iteration:  735  Classification rate:  0.12743461324429606  Cost:  5292.999726549481
Iteration:  736  Classification rate:  0.12743461324429606  Cost:  

Iteration:  823  Classification rate:  0.1296605453533667  Cost:  5211.700483187766
Iteration:  824  Classification rate:  0.1296605453533667  Cost:  5210.809403801701
Iteration:  825  Classification rate:  0.1296605453533667  Cost:  5209.919025778188
Iteration:  826  Classification rate:  0.1296605453533667  Cost:  5209.029348003064
Iteration:  827  Classification rate:  0.1296605453533667  Cost:  5208.140369364621
Iteration:  828  Classification rate:  0.1296605453533667  Cost:  5207.252088753597
Iteration:  829  Classification rate:  0.1296605453533667  Cost:  5206.3645050631785
Iteration:  830  Classification rate:  0.1296605453533667  Cost:  5205.477617188987
Iteration:  831  Classification rate:  0.1296605453533667  Cost:  5204.591424029078
Iteration:  832  Classification rate:  0.1296605453533667  Cost:  5203.705924483932
Iteration:  833  Classification rate:  0.1296605453533667  Cost:  5202.821117456458
Iteration:  834  Classification rate:  0.1296605453533667  Cost:  5201.9370

Iteration:  921  Classification rate:  0.1296605453533667  Cost:  5127.547337840993
Iteration:  922  Classification rate:  0.1296605453533667  Cost:  5126.7200513132575
Iteration:  923  Classification rate:  0.1296605453533667  Cost:  5125.893367861923
Iteration:  924  Classification rate:  0.1302170283806344  Cost:  5125.067286589941
Iteration:  925  Classification rate:  0.1302170283806344  Cost:  5124.241806602251
Iteration:  926  Classification rate:  0.1302170283806344  Cost:  5123.416927005776
Iteration:  927  Classification rate:  0.1302170283806344  Cost:  5122.592646909425
Iteration:  928  Classification rate:  0.1302170283806344  Cost:  5121.7689654240785
Iteration:  929  Classification rate:  0.1302170283806344  Cost:  5120.945881662592
Iteration:  930  Classification rate:  0.1302170283806344  Cost:  5120.12339473979
Iteration:  931  Classification rate:  0.1302170283806344  Cost:  5119.301503772459
Iteration:  932  Classification rate:  0.1302170283806344  Cost:  5118.4802