In [1]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(565113221)

def sigmoid(x): # sigmoid function 
    return 1/(1+np.power(np.e,-x))

def forward(x,W1,W2,b1,b2): # feed forward
    a = W1.dot(x)
    z = sigmoid(a+b1)
    b = W2.dot(z)
    y = sigmoid(b+b2)
    return a,z,b,y

def pred(X,W1,W2,b1,b2): # predict
    y_pred = np.zeros((X.shape[0],1))
    for i in range(X.shape[0]):
        _,_,_,y_pred[i] = forward(x.reshape((-1,1)),W1,W2,b1,b2)
    return y_pred

X = np.array([[0,0],[0,1],[1,0],[1,1]]) # features 4 * 2
Y = np.array([[0],[1],[1],[0]]) # labels 4 * 1

epsilon = 0.12 # initialize all weighs between -0.12 ~ 0.12
W1 = np.random.random((2,2)) * epsilon * 2 - epsilon # map from input to hidden
b1 = np.random.random((2,1)) * epsilon * 2 - epsilon # bias1 
W2 = np.random.random((1,2)) * epsilon * 2 - epsilon # map from hidden to output
b2 = np.random.random((1,1)) * epsilon * 2 - epsilon # bias2
epoch = 50 # maximum training turns
alpha = 0.01 # learning rate
for turn in range(epoch):
    print('turn:',turn,end=' ')
    epoch_cost = 0
    for index in range(X.shape[0]):
        x = X[index,:].reshape((-1,1))
        y = Y[index,:].reshape((-1,1))
        a,z,b,y_pred = forward(x,W1,W2,b1,b2) # feed forward

        cost = -y.dot(np.log(y_pred)) - (1-y).dot(np.log(1-y_pred)) # calculate cost
        epoch_cost += cost # calculate cumulative cost of this epoch

        for k in range(W2.shape[0]): # update W2
            for j in range(W2.shape[1]):
                W2[k,j] -= alpha * (y_pred - y) * z[j,0]

        for k in range(b2.shape[0]): # update b2
            b2[k,0] -= alpha * (y_pred - y)


        for j in range(W1.shape[0]): # update W1
            for i in range(W1.shape[1]):
                for k in range(W2.shape[0]):
                    W1[j,i] -= alpha * (y_pred - y) * W2[k,j] * z[j,0] * (1 - z[j,0]) * x[i]

        for j in range(b1.shape[0]): # update b1
            b1[j,0] -= alpha * (y_pred - y) * W2[k,j] * z[j,0] * (1 - z[j,0])

    print('cost:',epoch_cost)


print('prediction\n',pred(X,W1,W2,b1,b2))
print('ground-truth\n',Y)

turn: 0 cost: [[2.78597329]]
turn: 1 cost: [[2.78580064]]
turn: 2 cost: [[2.78563317]]
turn: 3 cost: [[2.78547074]]
turn: 4 cost: [[2.78531319]]
turn: 5 cost: [[2.78516037]]
turn: 6 cost: [[2.78501215]]
turn: 7 cost: [[2.78486839]]
turn: 8 cost: [[2.78472895]]
turn: 9 cost: [[2.7845937]]
turn: 10 cost: [[2.78446252]]
turn: 11 cost: [[2.78433528]]
turn: 12 cost: [[2.78421187]]
turn: 13 cost: [[2.78409217]]
turn: 14 cost: [[2.78397607]]
turn: 15 cost: [[2.78386346]]
turn: 16 cost: [[2.78375424]]
turn: 17 cost: [[2.7836483]]
turn: 18 cost: [[2.78354555]]
turn: 19 cost: [[2.78344589]]
turn: 20 cost: [[2.78334923]]
turn: 21 cost: [[2.78325547]]
turn: 22 cost: [[2.78316454]]
turn: 23 cost: [[2.78307634]]
turn: 24 cost: [[2.7829908]]
turn: 25 cost: [[2.78290783]]
turn: 26 cost: [[2.78282735]]
turn: 27 cost: [[2.7827493]]
turn: 28 cost: [[2.78267359]]
turn: 29 cost: [[2.78260016]]
turn: 30 cost: [[2.78252894]]
turn: 31 cost: [[2.78245987]]
turn: 32 cost: [[2.78239287]]
turn: 33 cost: [[2.78232