In [19]:
import numpy as np
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
y = np.matrix(data.target).T
X = np.matrix(data.data)
M = X.shape[0]
N = X.shape[1]

# Normalize each input feature

def normalize(X):
    M = X.shape[0]
    XX = X - np.tile(np.mean(X,0),[M,1])
    XX = np.divide(XX, np.tile(np.std(XX,0),[M,1]))
    return XX

XX = normalize(X)

In [20]:
# Let's start with a 3-layer network with sigmoid activation functions,
# 6 units in layer 1, and 5 units in layer 2.
h2 = 5
h1 = 6

W = [[], np.random.normal(0,0.1,[N,h1]),
         np.random.normal(0,0.1,[h1,1])]
b = [[], np.random.normal(0,0.1,[h1,1]),
         np.random.normal(0,0.1,[1,1])]
L = len(W)-1
print(len(W))

3


In [21]:
def act(z):
    return 1/(1+np.exp(-z))

def actder(z):
    az = act(z)
    prod = np.multiply(az,1-az)
    return prod

def actder_relu(z):
    der = np.ones((z.shape[0],1))
    der[z < 0] = 0
    return der


def ff(x,W,b):
    L = len(W)-1
    a = x
    for l in range(1,L+1):
        z = W[l].T*a+b[l]
        a = act(z)
    return a

def loss(y,yhat):
    return -((1-y) * np.log(1-yhat) + y * np.log(yhat))
    

In [22]:
asd = np.array([[1],[-2],[3]])
actder_relu(asd)

array([[1.],
       [0.],
       [1.]])

In [24]:
# Use mini-batch size 1

alpha = 0.1
max_iter = 100
for iter in range(0, max_iter):
    loss_this_iter = 0
    order = np.random.permutation(M)
    for i in range(0,M):
        
        # Grab the pattern order[i]
        
        x_this = XX[order[i],:].T
        y_this = y[order[i],0]

        # Feed forward step
        
        a = [x_this]
        z = [[]]
        delta = [[]]
        dW = [[]]
        db = [[]]
        for l in range(1,L+1):
            z.append(W[l].T*a[l-1]+b[l])
            a.append(act(z[l]))
            # Just to give arrays the right shape for the backprop step
            delta.append([]); dW.append([]); db.append([])

        loss_this_pattern = loss(y_this, a[L][0,0])
        loss_this_iter = loss_this_iter + loss_this_pattern
            
        # Backprop step

        delta[L] = a[L] - y_this
        for l in range(L,0,-1):
            db[l] = delta[l].copy()
            dW[l] = a[l-1] * delta[l].T
            if l == L:
                delta[l-1] = np.multiply(actder(z[l-1]), W[l] * delta[l])
            elif l > 1:
                delta[l-1] = np.multiply(actder_relu(z[l-1]), W[l] * delta[l])
                
        # Check delta calculation
        
        if False:
            print('Target: %f' % y_this)
            print('y_hat: %f' % a[L][0,0])
            print(db)
            y_pred = ff(x_this,W,b)
            diff = 1e-3
            W[1][10,0] = W[1][10,0] + diff
            y_pred_db = ff(x_this,W,b)
            L1 = loss(y_this,y_pred)
            L2 = loss(y_this,y_pred_db)
            db_finite_difference = (L2-L1)/diff
            print('Original out %f, perturbed out %f' %
                 (y_pred[0,0], y_pred_db[0,0]))
            print('Theoretical dW %f, calculated db %f' %
                  (dW[1][10,0], db_finite_difference[0,0]))
        
        for l in range(1,L+1):            
            W[l] = W[l] - alpha * dW[l]
            b[l] = b[l] - alpha * db[l]
        
    print('Iteration %d loss %f' % (iter, loss_this_iter))

Iteration 0 loss 3.563132
Iteration 1 loss 3.407470
Iteration 2 loss 2.972012
Iteration 3 loss 3.422946
Iteration 4 loss 3.107746
Iteration 5 loss 3.200714
Iteration 6 loss 3.015608
Iteration 7 loss 3.303992
Iteration 8 loss 3.125736
Iteration 9 loss 2.422943
Iteration 10 loss 3.281865
Iteration 11 loss 2.839812
Iteration 12 loss 2.506987
Iteration 13 loss 2.909919
Iteration 14 loss 3.143997
Iteration 15 loss 2.574789
Iteration 16 loss 3.011916
Iteration 17 loss 2.660247
Iteration 18 loss 2.665575
Iteration 19 loss 2.284274
Iteration 20 loss 2.971305
Iteration 21 loss 2.478878
Iteration 22 loss 2.396945
Iteration 23 loss 2.408013
Iteration 24 loss 2.357275
Iteration 25 loss 2.595456
Iteration 26 loss 2.322123
Iteration 27 loss 2.301611
Iteration 28 loss 2.502967
Iteration 29 loss 2.333570
Iteration 30 loss 2.221720
Iteration 31 loss 2.315972
Iteration 32 loss 2.225939
Iteration 33 loss 2.007063
Iteration 34 loss 2.144705
Iteration 35 loss 2.249719
Iteration 36 loss 2.102543
Iteration 3