In [81]:
import numpy as np

In [106]:
weights = np.random.uniform(size = 9)

In [107]:
def xor_net(inputs, weights):
    """
    A single forward pass through the network.
    Implementation of wX + b
    """
    
    num_inputs = 2 
    num_hidden = 2
    num_output = 1
    
    weights_01 = np.zeros((num_inputs, num_hidden))
    weights_12 = np.zeros((num_hidden, num_output))
    
    weights_01[0,0], weights_01[0,1] = weights[0], weights[1]
    weights_01[1,0], weights_01[1,1] = weights[2], weights[3]
    weights_12[0,0], weights_12[1,0] = weights[4], weights[5]
    
    b_01 = np.zeros((1,num_hidden))
    b_12 = np.zeros((1,num_output))
    
    b_01[0,0], b_01[0,1] = weights[6], weights[7]
    b_12[0,0] = weights[8]  # 4 weights from layer 0 to 1; 
                            # 2 weights from layer 1 to 2; 
                            # 2 biases from layer 0 to 1; 
                            # 1 bias from layer 1 to 2;
                            # 9 values in total

    hidden_in = np.dot(inputs, weights_01) + b_01
    hidden_out = sigmoid(hidden_in)

    output_in = np.dot(hidden_out, weights_12) + b_12
    output_out = sigmoid(output_in)

    return hidden_in, hidden_out, output_in, output_out

In [108]:
def mse(weights):
    """
    Creates an XOR network with given weights for all four input vectors
    and computes the mean square error for all of them
    """
    pred_00 = xor_net([0,0], weights)[-1]
    pred_01 = xor_net([0,1], weights)[-1]
    pred_10 = xor_net([1,0], weights)[-1]
    pred_11 = xor_net([1,1], weights)[-1]
    
    err = np.array([pred_00-0, pred_01-1, pred_10-1, pred_11-0])
    
    ms_err = np.mean(err**2)
    
    return ms_err, err

In [109]:
def sigmoid(x):
    """
    The sigmoid activation function.
    """
    return 1 / (1 + np.exp(-x))
    
def delsigmoid(x):
    """
    The first derivative of the sigmoid function wrt x
    """
    return x * (1 - x)

In [110]:
def deriv_mse(MSE):
    """
    Get derivative of mse w.r.t. each final output in an array
    """
    ms_err, err = MSE
    return 1/np.sqrt(ms_err)*err

def grdmse(weights):
    """
    Propagate change of weights to change of mse
    """
    
    w00, w01, w10, w11 = weights[0], weights[1], weights[2], weights[3]
    w_00, w_10 = weights[4], weights[5]
    b00, b01 = weights[6], weights[7]
    b_00 = weights[8]
    
    hin_00, hout_00, oin_00, oout_00 = xor_net([0,0], weights)
    hin_01, hout_01, oin_01, oout_01 = xor_net([0,1], weights)
    hin_10, hout_10, oin_10, oout_10 = xor_net([1,0], weights)
    hin_11, hout_11, oin_11, oout_11 = xor_net([1,1], weights)
    
    dw9_00, dw9_01 = delsigmoid(oin_00), delsigmoid(oin_01)
    dw9_10, dw9_11 = delsigmoid(oin_10), delsigmoid(oin_11)
    dw9 = np.array([dw9_00, dw9_01, dw9_10, dw9_11])
    
    dw8_00, dw8_01 = dw9_00*w_10*delsigmoid(hin_00[0][1]), dw9_01*w_10*delsigmoid(hin_01[0][1])
    dw8_10, dw8_11 = dw9_10*w_10*delsigmoid(hin_10[0][1]), dw9_11*w_10*delsigmoid(hin_11[0][1])
    dw8 = np.array([dw8_00, dw8_01, dw8_10, dw8_11])
    
    dw7_00, dw7_01 = dw9_00*w_00*delsigmoid(hin_00[0][0]), dw9_01*w_00*delsigmoid(hin_01[0][0])
    dw7_10, dw7_11 = dw9_10*w_00*delsigmoid(hin_10[0][0]), dw9_11*w_00*delsigmoid(hin_11[0][0])
    dw7 = np.array([dw7_00, dw7_01, dw7_10, dw7_11])
    
    dw6_00, dw6_01 = dw9_00*hout_00[0][1], dw9_01*hout_01[0][1]
    dw6_10, dw6_11 = dw9_10*hout_10[0][1], dw9_11*hout_11[0][1]
    dw6 = np.array([dw6_00, dw6_01, dw6_10, dw6_11])
    
    dw5_00, dw5_01 = dw9_00*hout_00[0][0], dw9_01*hout_01[0][0]
    dw5_10, dw5_11 = dw9_10*hout_10[0][0], dw9_11*hout_11[0][0]
    dw5 = np.array([dw5_00, dw5_01, dw5_10, dw5_11])
    
    dw4_00, dw4_01 = dw9_00*w_10*delsigmoid(hin_00[0][1])*0, dw9_01*w_10*delsigmoid(hin_01[0][1])*1
    dw4_10, dw4_11 = dw9_10*w_10*delsigmoid(hin_10[0][1])*0, dw9_11*w_10*delsigmoid(hin_11[0][1])*1
    dw4 = np.array([dw4_00, dw4_01, dw4_10, dw4_11])
    
    dw3_00, dw3_01 = dw9_00*w_10*delsigmoid(hin_00[0][1])*0, dw9_01*w_10*delsigmoid(hin_01[0][1])*0
    dw3_10, dw3_11 = dw9_10*w_10*delsigmoid(hin_10[0][1])*1, dw9_11*w_10*delsigmoid(hin_11[0][1])*1
    dw3 = np.array([dw3_00, dw3_01, dw3_10, dw3_11])
    
    dw2_00, dw2_01 = dw9_00*w_00*delsigmoid(hin_00[0][0])*0, dw9_01*w_00*delsigmoid(hin_01[0][0])*1
    dw2_10, dw2_11 = dw9_10*w_00*delsigmoid(hin_10[0][0])*0, dw9_11*w_00*delsigmoid(hin_11[0][0])*1
    dw2 = np.array([dw2_00, dw2_01, dw2_10, dw2_11])
    
    dw1_00, dw1_01 = dw9_00*w_00*delsigmoid(hin_00[0][0])*0, dw9_01*w_00*delsigmoid(hin_01[0][0])*0
    dw1_10, dw1_11 = dw9_10*w_00*delsigmoid(hin_10[0][0])*1, dw9_11*w_00*delsigmoid(hin_11[0][0])*1
    dw1 = np.array([dw1_00, dw1_01, dw1_10, dw1_11])
    
    DW1 = np.sum(dw1*deriv_mse(mse(weights)))
    DW2 = np.sum(dw2*deriv_mse(mse(weights)))
    DW3 = np.sum(dw3*deriv_mse(mse(weights)))
    DW4 = np.sum(dw4*deriv_mse(mse(weights)))
    DW5 = np.sum(dw5*deriv_mse(mse(weights)))
    DW6 = np.sum(dw6*deriv_mse(mse(weights)))
    DW7 = np.sum(dw7*deriv_mse(mse(weights)))
    DW8 = np.sum(dw8*deriv_mse(mse(weights)))
    DW9 = np.sum(dw9*deriv_mse(mse(weights)))
    
    dmse = np.array([DW1, DW2, DW3, DW4, DW5, DW6, DW7, DW8, DW9])
    
    return dmse

In [111]:
def one_one(index, change):
    cx = np.zeros(9)
    cx[index] = 1
    return change*cx

def mse_diff(weights1, weights2):
    mse1 = mse(weights1)[0]
    mse2 = mse(weights2)[0]
    return mse2-mse1
    

def grdmse(weights):
    """Returns a vector of partial derivatives of dMSE/DW, i.e.;
    the amount of change in the MSE as a result of a small change 
    in each weight.
    """
    change = 0.1
    cvs = []
    for i in range(len(weights)):
        ci = one_one(i, change)
        cvs.append(ci)
        

    gradient = np.array([mse_diff(weights,weights+cvs[0]),mse_diff(weights,weights+cvs[1]),mse_diff(weights,weights+cvs[2]),
                        mse_diff(weights,weights+cvs[3]),mse_diff(weights,weights+cvs[4]),mse_diff(weights,weights+cvs[5]),
                        mse_diff(weights,weights+cvs[6]),mse_diff(weights,weights+cvs[7]),mse_diff(weights,weights+cvs[8])])
    
    return gradient

In [112]:
def classify(num):
    if num>=0.5:
        return 1
    return 0

lr = 0.01
for i in range(100):
    weights = weights - lr*grdmse(weights)

Pred_00 = xor_net([0,0], weights)[-1]
Pred_01 = xor_net([0,1], weights)[-1]
Pred_10 = xor_net([1,0], weights)[-1]
Pred_11 = xor_net([1,1], weights)[-1]

num_00 = classify(Pred_00)
num_01 = classify(Pred_01)
num_10 = classify(Pred_10)
num_11 = classify(Pred_11)

In [113]:
print(Pred_00)
print(Pred_01)
print(Pred_10)
print(Pred_11)

[[0.77389578]]
[[0.82616346]]
[[0.80747797]]
[[0.84457755]]
