In [1]:
#building a neural network from scratch and code how it performs predictions using forward propagation
#understanding neural networks and how they work

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
#randomly initializing the weights and the biases in the network
#6 weights and 3 biases, one for each node in the hidden layer as well as output layer
weights= np.around(np.random.uniform(size=6), decimals=2)
biases= np.around(np.random.uniform(size=3), decimals=2)
print (weights)
print(biases)

[0.89 0.08 0.12 0.02 0.93 0.02]
[0.27 0.25 0.01]


In [4]:
#let's compute the output for a given input, x1 and x2
x1=0.5
x2=0.85
print(f"x1 is {x1} and x2 is {x2}")


x1 is 0.5 and x2 is 0.85


In [5]:
#computing the weighted sum of the inputs z11,cat the first node of the hidden layer
z11=x1 * weights[0] + x2 * weights[1] + biases[0]
print(f"The weighted sum of the inputs at the first node in the hidden layer is {z11} ")

The weighted sum of the inputs at the first node in the hidden layer is 0.783 


In [6]:
#computing the weighted sum of the inputs z12 at the second node of the hidden layer
z12= x1 * weights[2] + x2 * weights[3] + biases[1]
print(f"The weighted sum of the inputs at the second node in he hidden layer is {z12}  ")

The weighted sum of the inputs at the second node in he hidden layer is 0.327  


In [7]:
#Assuming a sigmoid activation function, lets compute the activation of the first node a11, in the hidden layer
a11=1.0/(1.0 + np.exp(-z11))
a11= np.around(a11, decimals=4)
print(f'The activation of the first node in the hidden layer is {a11}')


The activation of the first node in the hidden layer is 0.6863


In [8]:
#computing the aactivation of the second node , a12, in the hidden layer
a12=1.0/(1.0 + np.exp(-z12))
a12= np.around(a12, decimals=4)
print(f'The activation of the second node in the hidden layer is {a12}')


The activation of the second node in the hidden layer is 0.581


In [9]:
#now these activations will serve as the inputs to the output layer. 
#so, lets compute the weighted sum of these inputs to the node int the output layer.
z2= a11 * weights[4] + a12 * weights[5] + biases[2]
print('The weighted sum of the inputs at the node in the output layer is {}'.format(np.around(z2, decimals=4)))

The weighted sum of the inputs at the node in the output layer is 0.6599


In [10]:
#let's compute the output of the network as the activation of the node in the output layer. Assign the value to a2.
a2=1.0/(1.0 + np.exp(-z2))
print('The output of the network for x1 = 0.5 and x2 = 0.85 is {}'.format(np.around(a2, decimals=4)))

The output of the network for x1 = 0.5 and x2 = 0.85 is 0.6592


neural networks for real problems are composed of many hidden layers and many more nodes in each layer. So, we can't continue making predictions using this very inefficient approach of computing the weighted sum at each node and the activation of each node manually.

In order to code an automatic way of making predictions, let's generalize our network. A general network would take n inputs, would have many hidden layers, each hidden layer having m nodes, and would have an output layer. Although the network is showing one hidden layer, but we will code the network to have many hidden layers. Similarly, although the network shows an output layer with one node, we will code the network to have more than one node in the output layer. 

In [11]:
#now lets start formmally defining the structure of the network
n=2   #number of inputs
num_hidden_layers = 2  #number of hidden layers
m=[2,2]    #number of nodes in each hidden layer
num_nodes_output =1  #number of nodes in the output layer

In [12]:
#now, lets initialize the weights and the biases in the network to random numbers
num_nodes_previous = n   #number of nodes in previous layer
network = {}  #initialize network as an empty dictionary
# loop through each layer and randomly initialize the weights and biases associated with each node
# notice how we are adding 1 to the number of hidden layers in order to include the output layer
for layer in range(num_hidden_layers + 1):
    if layer ==num_hidden_layers:
        layer_name= "output"
        num_nodes= num_nodes_output
    else:
        layer_name= 'layer_{}'.format(layer+1)
        num_nodes=m[layer]

#initialize weights and biases associated with each node in the current layer
    network[layer_name]={}
    for node in range(num_nodes):
        node_name = 'node_{}'.format(node+1)
        network[layer_name][node_name] = {
            'weights': np.around(np.random.uniform(size=num_nodes_previous), decimals=2),
            'bias': np.around(np.random.uniform(size=1), decimals=2),
        }
    
    num_nodes_previous = num_nodes
    
print(network) # print network

{'layer_1': {'node_1': {'weights': array([0.47, 0.02]), 'bias': array([0.02])}, 'node_2': {'weights': array([0.27, 0.54]), 'bias': array([0.32])}}, 'layer_2': {'node_1': {'weights': array([0.05, 0.68]), 'bias': array([0.93])}, 'node_2': {'weights': array([0.86, 0.73]), 'bias': array([0.7])}}, 'output': {'node_1': {'weights': array([0.66, 0.08]), 'bias': array([0.46])}}}


So now with the above code, we are able to initialize the weights and the biases pertaining to any network of any number of hidden layers and number of nodes in each layer. But let's put this code in a function so that we are able to repetitively execute all this code whenever we want to construct a neural network.

In [13]:
def initialize_network(num_inputs, num_hidden_layers, num_nodes_hidden, num_nodes_output):
    
    num_nodes_previous = num_inputs # number of nodes in the previous layer

    network = {}
    
    # loop through each layer and randomly initialize the weights and biases associated with each layer
    for layer in range(num_hidden_layers + 1):
        
        if layer == num_hidden_layers:
            layer_name = 'output' # name last layer in the network output
            num_nodes = num_nodes_output
        else:
            layer_name = 'layer_{}'.format(layer + 1) # otherwise give the layer a number
            num_nodes = num_nodes_hidden[layer] 
        
        # initialize weights and bias for each node
        network[layer_name] = {}
        for node in range(num_nodes):
            node_name = 'node_{}'.format(node+1)
            network[layer_name][node_name] = {
                'weights': np.around(np.random.uniform(size=num_nodes_previous), decimals=2),
                'bias': np.around(np.random.uniform(size=1), decimals=2),
            }
    
        num_nodes_previous = num_nodes

    return network # return the network

now using the initialize_network function to create the network that takes 5 inputs, has 3 hidden layers, has 3 nodes in the first layer,2 nodes in the second layer and 3 nodes in the third layer and has 1 node in the output layer

In [14]:
small_network = initialize_network(5,3,[3,2,3],1)

In [15]:
#compute weighted sum at each node
def compute_weighted_sum(inputs, weights, bias):
    return np.sum(inputs * weights) + bias

In [16]:
#now lets generate 5 inputs that we can feed to small network
from random import seed
import numpy as np

np.random.seed(12)
inputs = np.around(np.random.uniform(size=5), decimals=2)

print('The inputs to the network are {}'.format(inputs))

The inputs to the network are [0.15 0.74 0.26 0.53 0.01]


In [17]:
node_weights = small_network['layer_1']['node_1']['weights']
node_bias = small_network['layer_1']['node_1']['bias']

weighted_sum = compute_weighted_sum(inputs, node_weights, node_bias)
print('The weighted sum at the first node in the hidden layer is {}'.format(np.around(weighted_sum[0], decimals=4)))

The weighted sum at the first node in the hidden layer is 1.5176


In [18]:
#compute node activation
def node_activation(weighted_sum):
    return 1.0 / (1.0 + np.exp(-1 * weighted_sum))
node_output  = node_activation(compute_weighted_sum(inputs, node_weights, node_bias))
print('The output of the first node in the hidden layer is {}'.format(np.around(node_output[0], decimals=4)))

The output of the first node in the hidden layer is 0.8202


The final piece of building a neural network that can perform predictions is to put everything together. So let's create a function that applies the compute_weighted_sum and node_activation functions to each node in the network and propagates the data all the way to the output layer and outputs a prediction for each node in the output layer.

In [19]:
def forward_propagate(network, inputs):
    
    layer_inputs = list(inputs) # start with the input layer as the input to the first hidden layer
    
    for layer in network:
        
        layer_data = network[layer]
        
        layer_outputs = [] 
        for layer_node in layer_data:
        
            node_data = layer_data[layer_node]
        
            # compute the weighted sum and the output of each node at the same time 
            node_output = node_activation(compute_weighted_sum(layer_inputs, node_data['weights'], node_data['bias']))
            layer_outputs.append(np.around(node_output[0], decimals=4))
            
        if layer != 'output':
            print('The outputs of the nodes in hidden layer number {} is {}'.format(layer.split('_')[1], layer_outputs))
    
        layer_inputs = layer_outputs # set the output of this layer to be the input to next layer

    network_predictions = layer_outputs
    return network_predictions

In [20]:
predictions = forward_propagate(small_network, inputs)
print('The predicted value by the network for the given input is {}'.format(np.around(predictions[0], decimals=4)))

The outputs of the nodes in hidden layer number 1 is [np.float64(0.8202), np.float64(0.805), np.float64(0.8154)]
The outputs of the nodes in hidden layer number 2 is [np.float64(0.8761), np.float64(0.8167)]
The outputs of the nodes in hidden layer number 3 is [np.float64(0.8133), np.float64(0.7173), np.float64(0.8275)]
The predicted value by the network for the given input is 0.9067
