In [5]:
import numpy as np
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
y = np.matrix(data.target).T
X = np.matrix(data.data)
M = X.shape[0]
N = X.shape[1]

In [6]:
# Normalize each input feature

def normalize(X):
    M = X.shape[0]
    XX = X - np.tile(np.mean(X,0),[M,1])
    XX = np.divide(XX, np.tile(np.std(XX,0),[M,1]))
    return XX

XX = normalize(X)

In [7]:
# Let's start with a 3-layer network with sigmoid activation functions,
# 6 units in layer 1, and 5 units in layer 2.

h2 = 5
h1 = 6
W = [[], np.random.normal(0,0.1,[N,h1]),
         np.random.normal(0,0.1,[h1,h2]),
         np.random.normal(0,0.1,[h2,1])]
b = [[], np.random.normal(0,0.1,[h1,1]),
         np.random.normal(0,0.1,[h2,1]),
         np.random.normal(0,0.1,[1,1])]
L = len(W)-1

In [8]:
def act(z):
    return 1/(1+np.exp(-z))

def actder(z):
    az = act(z)
    prod = np.multiply(az,1-az)
    return prod

def ff(x,W,b):
    L = len(W)-1
    a = x
    for l in range(1,L+1):
        z = W[l].T*a+b[l]
        a = act(z)
    return a

def loss(y,yhat):
    return -((1-y) * np.log(1-yhat) + y * np.log(yhat))

In [None]:
    
# Use mini-batch size 1

alpha = 0.01
max_iter = 1000
for iter in range(0, max_iter):
    loss_this_iter = 0
    order = np.random.permutation(M)
    for i in range(0,M):
        
        # Grab the pattern order[i]
        
        x_this = XX[order[i],:].T
        y_this = y[order[i],0]

        # Feed forward step
        
        a = [x_this]
        z = [[]]
        delta = [[]]
        dW = [[]]
        db = [[]]
        for l in range(1,L+1):
            z.append(W[l].T*a[l-1]+b[l])
            a.append(act(z[l]))
            # Just to give arrays the right shape for the backprop step
            delta.append([]); dW.append([]); db.append([])
            
        loss_this_pattern = loss(y_this, a[L][0,0])
        loss_this_iter = loss_this_iter + loss_this_pattern
            
        # Backprop step

        delta[L] = a[L] - y_this
        for l in range(L,0,-1):
            db[l] = delta[l].copy()
            dW[l] = a[l-1] * delta[l].T
            if l > 1:
                delta[l-1] = np.multiply(actder(z[l-1]), W[l] *
                             delta[l])
                
        # Check delta calculation
        
        if False:
            print('Target: %f' % y_this)
            print('y_hat: %f' % a[L][0,0])
            print(db)
            y_pred = ff(x_this,W,b)
            diff = 1e-3
            W[1][10,0] = W[1][10,0] + diff
            y_pred_db = ff(x_this,W,b)
            L1 = loss(y_this,y_pred)
            L2 = loss(y_this,y_pred_db)
            db_finite_difference = (L2-L1)/diff
            print('Original out %f, perturbed out %f' %
                 (y_pred[0,0], y_pred_db[0,0]))
            print('Theoretical dW %f, calculated db %f' %
                  (dW[1][10,0], db_finite_difference[0,0]))
        
        for l in range(1,L+1):            
            W[l] = W[l] - alpha * dW[l]
            b[l] = b[l] - alpha * db[l]
        
    print('Iteration %d loss %f' % (iter, loss_this_iter))

Iteration 0 loss 378.306881
Iteration 1 loss 376.171569
Iteration 2 loss 375.629844
Iteration 3 loss 374.579677
Iteration 4 loss 372.561964
Iteration 5 loss 369.130907
Iteration 6 loss 360.610839
Iteration 7 loss 348.432113
Iteration 8 loss 318.941665
Iteration 9 loss 269.429416
Iteration 10 loss 209.540712
Iteration 11 loss 159.182504
Iteration 12 loss 122.985221
Iteration 13 loss 98.599722
Iteration 14 loss 82.671303
Iteration 15 loss 71.726888
Iteration 16 loss 63.946665
Iteration 17 loss 58.336312
Iteration 18 loss 54.067648
Iteration 19 loss 50.673430
Iteration 20 loss 48.100940
Iteration 21 loss 45.886620
Iteration 22 loss 44.005641
Iteration 23 loss 42.457702
Iteration 24 loss 40.961580
Iteration 25 loss 39.889138
Iteration 26 loss 38.845684
Iteration 27 loss 37.951864
Iteration 28 loss 37.028534
Iteration 29 loss 36.424398
Iteration 30 loss 35.790592
Iteration 31 loss 35.161696
Iteration 32 loss 34.607348
Iteration 33 loss 34.170126
Iteration 34 loss 33.627383
Iteration 35 loss

Iteration 286 loss 10.779198
Iteration 287 loss 10.568191
Iteration 288 loss 10.580141
Iteration 289 loss 10.576620
Iteration 290 loss 10.531476
Iteration 291 loss 10.455175
Iteration 292 loss 10.435509
Iteration 293 loss 10.364306
Iteration 294 loss 10.228898
Iteration 295 loss 10.268953
Iteration 296 loss 10.174840
Iteration 297 loss 10.124904
Iteration 298 loss 10.031175
Iteration 299 loss 9.957024
Iteration 300 loss 9.932502
Iteration 301 loss 9.925031
Iteration 302 loss 9.880664
Iteration 303 loss 9.726750
Iteration 304 loss 9.790531
Iteration 305 loss 9.750516
Iteration 306 loss 9.608154
Iteration 307 loss 9.623615
Iteration 308 loss 9.581094
Iteration 309 loss 9.442535
Iteration 310 loss 9.466988
Iteration 311 loss 9.400089
Iteration 312 loss 9.352625
Iteration 313 loss 9.322280
Iteration 314 loss 9.260649
Iteration 315 loss 9.227946
Iteration 316 loss 9.173039
Iteration 317 loss 9.095603
Iteration 318 loss 9.065046
Iteration 319 loss 9.022733
Iteration 320 loss 8.982787
Iterati