In [50]:
import numpy as np

In this script, we would like to build a simple, generalized neural network with n features (each input data has n values) and m outputs with only one hidden layer with k neurons.

In [51]:
dataset = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1], [0, 0, 0]])
output = np.array([[0, 0, 1], [1, 0, 0], [1, 1, 0], [0, 1, 1], [0, 0, 1]])
print("The data set is: \n", dataset, "\n")
print("The output is: \n", output)

The data set is: 
 [[0 0 1]
 [1 1 1]
 [1 0 1]
 [0 1 1]
 [0 0 0]] 

The output is: 
 [[0 0 1]
 [1 0 0]
 [1 1 0]
 [0 1 1]
 [0 0 1]]


In [52]:
#Augment the input matrix to implement also the bias parameter on the input layer
#b1 = np.ones([dataset.shape[0], 1])
#dataset1 = np.hstack((dataset, b1))

#number of neurons in the first hidden layer
nn1 = 6

#number of neurons in the second hidden layer
nn2 = 7

#initialize the first weight matrix:
W_1 = np.random.random([dataset.shape[1], nn1])

#intialize the second weight matrix, consider also the bias parameter:
W_2 = np.random.random([nn1, nn2])

#intialize the third weight matrix, consider also the bias parameter:
W_3 = np.random.random([nn2, output.shape[1]])

#activation function:
def sig(x):
    return 1 / (1 + np.exp(-x))

#derivative of sigmoid:
def dsig(x):
    return x * (1 - x)

In [53]:
#learning rate
lr =  0.9

In [5]:
i = 0
err = 100
while i<100000:
    if err < 1e-2:
        break
        
    i += 1
    
    #update the predicted values:
    z1 = sig(np.dot(dataset, W_1))
    
    #augment z1 to include the bias parameter in the first hidden layer
    #b2 = np.ones([z1.shape[0], 1])
    #z1 = np.hstack((z1, b2))
    
    z2 = sig(np.dot(z1, W_2))
    
    #augment z2 to include the bias parameter in the second hidden layer
    #b3 = np.ones([z2.shape[0], 1])
    #z2 = np.hstack((z2, b3))
    
    pred_out = sig(np.dot(z2, W_3))
    
    #compute:
    err = 0.5 * np.linalg.norm(output - pred_out, 2)
    
    #back propagation using gradient descent method:
    dW_3 = np.dot(z2.T, -(output - pred_out) * dsig(pred_out))
    
    dW_2 = np.zeros_like(W_2)
    for j in range(dW_2.shape[0]):
        for k in range(dW_2.shape[1]):
            dW_2[j, k] = np.dot(W_3[k,:], np.dot(-(output - pred_out).T * dsig(pred_out).T, dsig(z2)[:,k] * z1[:,j]))
            
    dW_1 = np.zeros_like(W_1)
    for p in range(dW_3.shape[0]):
        for q in range(dW_2.shape[1]):
            dW_3[p,q] = dsig(z1) * dataset1
    
    #update the weight matrix
    W_1 -= lr * dW_1
    W_2 -= lr * dW_2
    W_3 -= lr * dW_3
    

print("Finish with",i, "iteration(s) with error", err,".")
print("\nThe weight matrix from the input to the hidden layer: \n", W_1)
print("\nThe weight matrix from the hidden to the output layer: \n", W_2)
print("\nThe final predicted output: \n", pred_out)

ValueError: operands could not be broadcast together with shapes (5,3) (5,4) 

In [56]:
z1 = sig(np.dot(dataset, W_1))
    
    #augment z1 to include the bias parameter in the first hidden layer
#b2 = np.ones([z1.shape[0], 1])
#z1 = np.hstack((z1, b2))
    
z2 = sig(np.dot(z1, W_2))
    
    #augment z2 to include the bias parameter in the second hidden layer
#b3 = np.ones([z2.shape[0], 1])
#z2 = np.hstack((z2, b3))
    
pred_out = sig(np.dot(z2, W_3))

In [27]:
z1.shape

(5, 7)

In [28]:
dataset1.shape

(5, 4)

In [29]:
np.dot(dataset1.T, dsig(z1))

array([[0.19701751, 0.14154855, 0.37110084, 0.35870058, 0.24639821,
        0.296309  , 0.        ],
       [0.21094123, 0.14511524, 0.34091388, 0.2851985 , 0.30759728,
        0.27865402, 0.        ],
       [0.50769907, 0.34854143, 0.78305562, 0.74348463, 0.67286819,
        0.63717115, 0.        ],
       [0.73526242, 0.54637895, 1.02944828, 0.99205018, 0.92220732,
        0.84561222, 0.        ]])

In [34]:
np.sum(-(output - pred_out) * dsig(pred_out), axis = 1).shape

(5,)

In [19]:
-(output - pred_out) * dsig(pred_out)

array([[ 0.06475014,  0.02689781, -0.00818456],
       [-0.00462458,  0.02472941,  0.07445438],
       [-0.00484341, -0.00070098,  0.07556281],
       [ 0.06256925, -0.00071296, -0.00770661],
       [ 0.06734552,  0.02831572, -0.00874335]])

In [39]:
np.dot(-(output - pred_out) * dsig(pred_out), W_3.T)

array([[0.02808892, 0.02415438, 0.00370432, 0.0211651 , 0.0258736 ,
        0.01559275, 0.01009632, 0.01530433],
       [0.02958933, 0.02492679, 0.01668671, 0.02289146, 0.03560053,
        0.03164079, 0.01850771, 0.01357007],
       [0.00817947, 0.00669111, 0.01569168, 0.00694793, 0.01701759,
        0.02529253, 0.01690302, 0.00428656],
       [0.00453102, 0.00408457, 0.00194926, 0.00357818, 0.00500222,
        0.00740005, 0.00730096, 0.00487645],
       [0.02999245, 0.02579669, 0.00395272, 0.02260151, 0.0276241 ,
        0.01671795, 0.01088476, 0.01640056]])

In [43]:
a = np.dot(-(output - pred_out) * dsig(pred_out), W_3.T) * dsig(z2)

In [45]:
W_2.shape

(7, 7)

In [46]:
a.shape

(5, 8)