In [1]:
import numpy as np

In this script, we would like to build a simple, generalized neural network with n features (each input data has n values) and m outputs with only one hidden layer with k neurons.

In [2]:
dataset = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1], [0, 0, 0]])
output = np.array([[0, 0, 1], [1, 0, 0], [1, 1, 0], [0, 1, 1], [0, 0, 1]])
print("The data set is: \n", dataset, "\n")
print("The output is: \n", output)

The data set is: 
 [[0 0 1]
 [1 1 1]
 [1 0 1]
 [0 1 1]
 [0 0 0]] 

The output is: 
 [[0 0 1]
 [1 0 0]
 [1 1 0]
 [0 1 1]
 [0 0 1]]


In [3]:
#Augment the input matrix to implement also the bias parameter on the input layer
b1 = np.ones([dataset.shape[0], 1])
dataset1 = np.hstack((dataset, b1))

#number of neurons in the first hidden layer
nn1 = 5

#number of neurons in the second hidden layer
nn2 = 4

#initialize the first weight matrix:
W_1 = np.random.random([dataset1.shape[1], nn1])

#intialize the second weight matrix, consider also the bias parameter:
W_2 = np.random.random([nn1 + 1, nn2])

#intialize the third weight matrix, consider also the bias parameter:
W_3 = np.random.random([nn2 + 1, output.shape[1]])

#activation function:
def sig(x):
    return 1 / (1 + np.exp(-x))

#derivative of sigmoid:
def dsig(x):
    return x * (1 - x)

In [4]:
#learning rate
lr =  0.9

In [5]:
i = 0
err = 100
while i<100000:
    if err < 1e-2:
        break
        
    i += 1
    
    #update the predicted values:
    z1 = sig(np.dot(dataset1, W_1))
    
    #augment z1 to include the bias parameter in the first hidden layer
    b2 = np.ones([z1.shape[0], 1])
    z1 = np.hstack((z1, b2))
    
    z2 = sig(np.dot(z1, W_2))
    
    #augment z2 to include the bias parameter in the second hidden layer
    b3 = np.ones([z2.shape[0], 1])
    z2 = np.hstack((z2, b3))
    
    pred_out = sig(np.dot(z2, W_3))
    
    #compute:
    err = 0.5 * np.linalg.norm(output - pred_out, 2)
    
    #back propagation using gradient descent method:
    dW_3 = np.dot(z2.T, -(output - pred_out) * dsig(pred_out))
    
    #truncate the bias for the back propagation of the first layer
    z1t = z1[:,:-1]
    z2t = z2[:,:-1]
    W_3t = W_3[:-1,:]
    W_2t = W_2[:-1,:]
    
    dW_2 = np.dot(z1.T, dsig(z2t) * np.dot(-(output - pred_out) * dsig(pred_out), W_3t.T)) #use W_3t, z2t
            
    dW_1 = np.dot(dataset1.T, dsig(z1t) * np.dot(np.dot(-(output - pred_out) * dsig(pred_out), W_3t.T) * dsig(z2t), W_2t.T))
    
    #update the weight matrix
    W_1 -= lr * dW_1
    W_2 -= lr * dW_2
    W_3 -= lr * dW_3
    

print("Finish with",i, "iteration(s) with error", err,".")
print("\nThe weight matrix from the input to the hidden layer: \n", W_1)
print("\nThe weight matrix from the hidden to the output layer: \n", W_2)
print("\nThe final predicted output: \n", pred_out)

Finish with 17199 iteration(s) with error 0.009999744577695993 .

The weight matrix from the input to the hidden layer: 
 [[ 3.01345605  6.2484593   3.23841666  0.59462958  3.73882618]
 [ 1.7154687   1.8946162   3.7561239   0.64348969  3.33083873]
 [ 0.51147065 -0.82830122 -0.87445404  1.54908192  0.5571006 ]
 [-2.04661953 -4.1047734  -3.74114125  0.98094247 -2.31282787]]

The weight matrix from the hidden to the output layer: 
 [[ 3.06114829e+00  1.63439039e+00  2.84311441e-01  1.36800231e+00]
 [ 1.90489182e+00  8.01595647e+00  6.08317043e+00  8.62147398e-01]
 [ 4.98369242e+00  2.57845626e+00  5.10559184e+00  6.23554433e-01]
 [-4.02459722e-01 -1.19874884e+00 -2.12237038e+00  1.87419588e+00]
 [ 5.20603972e+00  1.39510953e+00  4.25404102e-03  1.31716726e+00]
 [-3.87127910e+00 -4.77216346e+00 -5.60829451e+00  2.27326785e+00]]

The final predicted output: 
 [[3.92314951e-03 7.06617261e-03 9.96244514e-01]
 [9.99920440e-01 9.31849967e-03 7.33494500e-05]
 [9.92209527e-01 9.90806759e-01 7.897