In [2]:
import numpy as np
# plotting import
import matplotlib.pyplot as plt
%matplotlib notebook



In [3]:
def prepend_one(X):
    
    """prepend a one vector to X."""
    
    # get number of training points
    n_trains = X.shape[0]       
    # creat a column vector (dim = nx1) with values of 1s only.
    ones = np.ones(X.shape[0])
    
    # create new data matrix of linear features.
    phi_X = np.column_stack([np.ones(X.shape[0]), X])
    
    return phi_X


In [4]:
sigmoid = lambda z: 1 / (1 + np.exp(-z))

# Simple MLP for learning Boolean AND and  XOR functions

In [5]:
#input for AND function

X = prepend_one(np.array( [[0,0],[0,1],[1,0],[1,1]]))

w = [-30.0,20.0,20.0]

y = sigmoid(np.dot(X,w))

print(y)


[9.35762297e-14 4.53978687e-05 4.53978687e-05 9.99954602e-01]


In [6]:
# XOR function: using a 2-layered NN to estimate XOR (with sigmoid activations at hidden nodes and output)

# augment bias term into inputs
X = prepend_one(np.array( [[0,0],[0,1],[1,0],[1,1]]))

# weights of the first layer (3 hidden nodes in which one bias term)
w1 = np.array([[-30.0,20.0,20.0],[10,-20,-20]]).T
# weights of the second layer (one output node)
w2 = [10,-20,-20]


# forward progpagation
g = prepend_one(sigmoid(np.dot(X,w1)))

y = sigmoid(np.dot(g,w2))


print(y)

[4.54391049e-05 9.99954520e-01 9.99954520e-01 4.54391049e-05]



# Backpropagation code
## This is code is from http://cs229.stanford.edu by Andrew Ng


In [13]:
# This is code is from http://cs229.stanford.edu by Andrew Ng
# Backpropagation code
import numpy as np
from copy import copy

# Example backpropagation code for binary classification with 2-layer
# neural network (single hidden layer)



# forward pass: receiving input x and return output y
def fprop(x, y, params):
    # Follows procedure given in notessigmoid
    W1, b1, W2, b2 = [params[key] for key in ('W1', 'b1', 'W2', 'b2')]
    z1 = np.dot(W1, x) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(W2, a1) + b2
    a2 = sigmoid(z2)
    loss = -(y * np.log(a2) + (1-y) * np.log(1-a2))
    ret = {'x': x, 'y': y, 'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2, 'loss': loss}
    for key in params:        
        ret[key] = params[key]   
    return ret

#backward pass: return updated weights
def bprop(fprop_cache):
    # Follows procedure given in notes
    x, y, z1, a1, z2, a2, loss = [fprop_cache[key] for key in ('x', 'y', 'z1', 'a1', 'z2', 'a2', 'loss')]
    de2 = - a2*(1-a2)*(y - a2)
    dW2 = np.dot(de2, a1.T)
    db2 = de2
    de1 = np.dot(fprop_cache['W2'].T, de2) * a1 * (1-a1)
    dW1 = np.dot(de1, x.T)
    db1 = de1
    return {'b1': db1, 'W1': dW1, 'b2': db2, 'W2': dW2}

# main code
if __name__ == '__main__':
    # Initialize random parameters/weights and inputs
    W1 = np.random.rand(2,2)
    b1 = np.random.rand(2, 1)
    W2 = np.random.rand(1, 2)
    b2 = np.random.rand(1, 1)
    params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    x = np.random.rand(2, 1)
    y = np.random.randint(0, 2)  # Returns 0/1

    fprop_cache = fprop(x, y, params)
    #backpropagation (to compute gradients for one iteration: at current x,y) 
    bprop_cache = bprop(fprop_cache)
   
        
    print("weights before update:")
    for key in params:        
        print(key," : ",params[key])
    fprop_cache=fprop(x,y,params)
    print("loss before update: ", fprop_cache['loss'])
    print()
    print("weights after update:")
    for key in params:
        params[key] = params[key] - 0.1 * bprop_cache[key] 
        print(key," : ",params[key])
    fprop_cache=fprop(x,y,params)
    print("loss after 1 update: ", fprop_cache['loss'])
    
    ############### do more updates ###########################
    
    for iteration in range(100):
        fprop_cache = fprop(x, y, params)
        #backpropagation (to compute gradients for one iteration: at current x,y) 
        bprop_cache = bprop(fprop_cache)
        for key in params:
            params[key] = params[key] - 0.5 * bprop_cache[key] 
        
        fprop_cache=fprop(x,y,params)
        print("MSE loss: ", fprop_cache['loss'], " after iteration: ",iteration)
        

weights before update:
b1  :  [[0.07965221]
 [0.21098417]]
b2  :  [[0.18303908]]
W1  :  [[0.59912385 0.04753944]
 [0.29077768 0.98248603]]
W2  :  [[0.56490147 0.50417534]]
loss before update:  [[1.17211337]]

weights after update:
b1  :  [[0.07758436]
 [0.20922561]]
b2  :  [[0.16828138]]
W1  :  [[0.59882369 0.04707027]
 [0.29052242 0.98208703]]
W2  :  [[0.55686982 0.49507239]]
loss after 1 update:  [[1.15473766]]
MSE loss:  [[1.07003859]]  after iteration:  0
MSE loss:  [[0.98929773]]  after iteration:  1
MSE loss:  [[0.91335118]]  after iteration:  2
MSE loss:  [[0.84282065]]  after iteration:  3
MSE loss:  [[0.77807358]]  after iteration:  4
MSE loss:  [[0.71922051]]  after iteration:  5
MSE loss:  [[0.66614566]]  after iteration:  6
MSE loss:  [[0.61855878]]  after iteration:  7
MSE loss:  [[0.57605397]]  after iteration:  8
MSE loss:  [[0.5381641]]  after iteration:  9
MSE loss:  [[0.50440408]]  after iteration:  10
MSE loss:  [[0.47430139]]  after iteration:  11
MSE loss:  [[0.447