In [33]:
import numpy as np

from sklearn.datasets import load_digits
digits = load_digits()

In [34]:
digits.keys()

dict_keys(['target', 'images', 'target_names', 'data', 'DESCR'])

In [35]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

#Creating the training/test sets

In [36]:
def create_sets(n):
    
    """    
    n : int, number of terms in training set
    
    *****************************
    
    returns
    
    training_set : dict, contains 'data' (list of arrays) and 'target' (list) where 
                   each target corresponds to a certain data of the same index.
                   
    test_set : dict, contains 'data' (list of arrays) and 'target' (list) where 
               each target corresponds to a certain data of the same index.
    """
    
    indices = np.linspace(0,len(digits.target)-1,len(digits.target), dtype='int')   #Specialized for sklearn data set
    np.random.shuffle(indices)                                                      #Randomize indices for random data set

    training_set = {'data': [],'target': []}                                        #Creates training set dictionary
    for i in range(n):
        training_set['data'].append(digits.data[indices[i]])
        training_set['target'].append(digits.target[indices[i]])

    test_set = {'data': [],'target': []}                                            #Creates test set dictionary
    for j in range(len(digits.target)-n):
        test_set['data'].append(digits.data[indices[-(j+1)]])
        test_set['target'].append(digits.target[indices[-(j+1)]])

    return training_set, test_set

In [37]:
training_set, test_set = create_sets(1000)
len(training_set['target']), len(test_set['target'])

(1000, 797)

In [38]:
def mini_batch(m, training_set):
    
    """
    m : int, number of terms in mini batch
    
    training_set : dict, from result of create_sets
    
    ************************************
    
    returns
    
    mini : list of dicts, 
    """
    
    n = len(training_set)
    m_indices = np.linspace(0,n-1,n, dtype='int')                   #All indices of the training set
    np.random.shuffle(m_indices)                                    #Randomize their order
    
    mini = []                                                       #Establish the 'mini' list
    mini_in = []                                                    #List for the randomized index arrays of the mini
    for i in range(int(n/m)):                                       #Creates ~n/m mini batches
        mini_in.append(m_indices[i*m:(i+1)*m])                      #Mini batches of size m
    
    for j in range(len(mini_in)):                                                   #Translate the indices into actual data
        mini_dat_sub = {'data': [], 'target': []}                                   #Creates intermediate dict
        for k in range(len(mini_in[j])):
            mini_dat_sub['data'].append(training_set['data'][mini_in[j][k]])      #Append data to intermediate dict
            mini_dat_sub['target'].append(training_set['target'][mini_in[j][k]])  #Append target to intermediate dict
        mini.append(mini_dat_sub)                                                   #Append dicts to 'mini' list
        
    return mini

In [39]:
n = 1000
m = 10
qwe = int(n/m)
que = n - qwe*m
qwe,que

(100, 0)

##Arrays

These couple cells are just me fooling around with arrays and what they can do.

In [40]:
def perceptron(n,w,b):
    
    """
    n : input, array
    
    w : weights, array
    
    b : biases, array
    """
    
    out = (n*w) + b
    return sigmoid(out)

In [41]:
def sigmoid(x):
    return 1/(1+np.exp(-x)) 

In [42]:
n = np.ones((2,2))
w = np.random.random((2,2))*-10
b = np.ones((2,2))*5
perceptron(n,w,b)

array([[ 0.6147401 ,  0.0635089 ],
       [ 0.00854868,  0.83363214]])

##Fun with Gates!

The following cells are excersises working with perceptrons and creating the different gates.

In [43]:
def nand(x1,x2):
    """
    x1, x2 : binary
    """
    w_b = [np.array([-2,-2]),np.array([3])]
    n = np.array([x1,x2])
    out = np.sum(w_b[0]*n) + w_b[1]
    if out > 0:
        return 1
    else:
        return 0

In [44]:
assert nand(1,1) == 0
assert nand(1,0) == 1
assert nand(0,1) == 1
assert nand(0,0) == 1

In [45]:
def xor(x1,x2):
    """
    x1, x2 : binary
    """
    prime1 = nand(x1,x2)
    prime2 = nand(x1,prime1)
    prime3 = nand(prime1,x2)
    out = nand(prime2,prime3)
    return out

In [46]:
assert xor(1,1) == 0
assert xor(1,0) == 1
assert xor(0,1) == 1
assert xor(0,0) == 0

In [47]:
def notg(x1):
    """
    x1, : binary
    """
    out = nand(x1,x1)
    return out

In [48]:
assert notg(1) == 0
assert notg(0) == 1

In [49]:
def andg(x1,x2):
    """
    x1, x2 : binary
    """
    prime1 = nand(x1,x2)
    out = notg(prime1)
    return out

In [50]:
assert andg(1,1) == 1
assert andg(1,0) == 0
assert andg(0,1) == 0
assert andg(0,0) == 0

In [51]:
def org(x1,x2):
    """
    x1, x2 : binary
    """
    prime1 = notg(x1)
    prime2 = notg(x2)
    out = nand(prime1,prime2)
    return out

In [52]:
assert org(1,1) == 1
assert org(1,0) == 1
assert org(0,1) == 1
assert org(0,0) == 0

Although I haven't gotten far with the actual neural network, I now have a much better idea of how to set it up.

#Start of Neural Network Work


In [53]:
digits.data

array([[  0.,   0.,   5., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,  10.,   0.,   0.],
       [  0.,   0.,   0., ...,  16.,   9.,   0.],
       ..., 
       [  0.,   0.,   1., ...,   6.,   0.,   0.],
       [  0.,   0.,   2., ...,  12.,   0.,   0.],
       [  0.,   0.,  10., ...,  12.,   1.,   0.]])

In [54]:
dig = np.random.shuffle(digits.images)

In [55]:
dig_shuffle = np.random.shuffle(digits.data)
dig_shuffle

In [56]:
def network_setup(ni, k, npl, no):
    
    """
    ni : int, number of neurons in input layer
    
    k : int, number of layers (excluding input and output layer)
    
    npl : int, number of neurons per hidden layer
    
    no : int, number of neurons in output layer
    
    *************************************
    
    returns
    
    w : list of arrays, weights between each neuron
    
    b : list, biases of each neuron
    """
    
#     np.random.seed(0)                           #For testing.
    k = int(k)
    npl = int(npl)
    
    w = []                                      #Weights
    w0 = np.random.randn(ni,npl)                #First weight, all 1's
    w.append(w0)
    if k > 1:
        for i in range(k-1):
            w.append(np.random.randn(npl,npl))
    w.append(np.random.randn(npl,no))           #Last weight, 10 outputs
    
    b = []                                      #Biases
    for i in range(k):
        b.append(np.random.randn(1,npl))
    b.append(np.random.randn(1,no))             #Last biases, 10 outputs
    
    return w,b

In [57]:
wt,bt = network_setup(64, 2, 16, 10)
wt,bt

([array([[ -9.35769434e-01,   2.76550537e-02,   9.31194711e-01, ...,
            5.68754796e-01,   5.49022708e-01,  -3.02431461e-01],
         [  1.20731703e+00,   1.37766662e+00,   6.36283930e-01, ...,
            1.31646817e-01,   1.10371482e+00,  -2.67684138e+00],
         [ -2.37698464e-01,  -1.75783260e-01,  -5.58238541e-01, ...,
           -7.77175322e-01,   9.94435002e-01,  -1.32151338e+00],
         ..., 
         [ -5.75960961e-01,  -9.12800301e-01,   5.98687811e-03, ...,
            3.67316770e-01,  -1.54763323e+00,   1.38630120e-03],
         [  6.81042873e-01,   1.42840171e+00,   4.52340501e-01, ...,
            1.05877219e+00,  -2.45661447e-01,  -4.14646956e-01],
         [  4.04397806e-01,   7.03427772e-01,   8.88822337e-01, ...,
           -9.73133515e-01,   1.07981279e+00,  -7.02804471e-03]]),
  array([[  1.45810082e+00,   1.78907668e+00,  -1.33256431e+00,
           -4.67876689e-01,  -1.02980354e-01,  -1.04640833e+00,
           -8.16730294e-01,   1.51798683e+00,  -6.1

In [58]:
w_test, b_test = network_setup(100,3,25,3)
assert len(w_test) == 4
assert len(b_test) == 4
assert w_test[0].shape == (100, 25)
assert w_test[1].shape == w_test[2].shape == (25,25)
assert w_test[3].shape == (25,3)
assert b_test[0].shape == b_test[1].shape == b_test[2].shape == (1,25)
assert b_test[3].shape == (1,3)

In [59]:
def feedforward(w,b,x):
    
    """
    w, b : weights and biases from network_setup
    
    x : input neuron values
    
    **************************************
    
    returns
    
    z : array of activation function inputs
    
    a : array of activation function outputs
    """
    
    z = []                                                  #Array of activation function inputs
    a = []                                                  #Initialize array for activation function values
    a.append(x)
    for i in range(k+1):                                    #k+1 involves hidden layers plus output layer
        zi = []
        for j in range(len(b[i][0,:])):
            zi.append(np.sum(w[i][:,j]*a[i]) + b[i][0,j])   #Loop through each weight and bias array to get z values of all neurons in the next layer.
        zz = np.array(zi)
        z.append(zz)
        a.append(sigmoid(zz))                               #Array of outputs
    return z, a

In [60]:
np.random.seed(1)
ni = 4
k = 1
npl = 3
no = 2
wff,bff = network_setup(ni,k,npl,no)
x = np.random.randn(ni)
zff,aff = feedforward(wff,bff,x)

In [61]:
def cost(n, y, a):
    
    """
    n : int, total number of training examples
    
    y : array, desired output
    
    a : list of arrays, output from activation functions
    
    ***********************************
    
    returns
    
    c : float, cost function value
    """
    
    return (1/(2*n))*np.sum((y-a)**2)               # Cost function

In [62]:
def sigmoid_prime(x):
    return np.exp(-x)/((1+np.exp(-x))**2)           # First derivative of sigmoid function

In [63]:
def act_error(a, y, z, w):
    
    """
    a : output from network
    
    y : desired output
    
    z : weighted input array
    
    w : weights list of arrays
    
    **********************************
    
    returns
    
    delta : list of arrays, error from each from neuron
    """
    
    delta = []                                      #Sets up delta list
    y_con = np.zeros(10)
    y_con[y] = 1                                    #Converts a digit 0-9 into the network output format
    
    d = (a - y_con)*sigmoid_prime(z[-1])                                        #Calculate error for last neuron layer
    delta.append(d)                                                             #Append it to delta list
    for i in range(k):                                                          #Cycle through each other layer...
        di = delta[0]*np.transpose(w[-(i+1)])*sigmoid_prime(z[-(i+2)])          #...in reverse order
        delta.insert(0,di)
    
    return delta

In [64]:
def replace(w,b,eta,m,delta,a):
    for i in range(len(w)):
        w[-(i+1)] = w[-(i+1)] - (eta/m)*np.sum(delta[-(i+1)]*a[-(i+2)])
        b[-(i+1)] = b[-(i+1)] - (eta/m)*np.sum(delta[-(i+1)])
    return w, b

In [65]:
def test_net(w,b,test_set,k):
    count = 0
    for i in range(len(test_set['target'])):
        x = test_set['data'][i]
        y = test_set['target'][i]
        y_con = np.zeros(10)
        y_con[y] = 1
        z, a = nn.feedforward(w,b,x,k)
        out = a[-1]
        for j in range(len(out)):
            if out[j] > 0.5:
                out[j] = 1
            else:
                out[j] = 0
        if out == y_con:
            count += 1
    print('Accuracy: {0}/{1}'.format(count,len(test_set['target'])))