In [1]:
import numpy as np

# Create an object to hold the initial random biases and weights and have the methods for computing the neuron values with the Sigmoid and ReLU methodologies

class Network(object):
  def __init__(self , sizes):           # input to object is sizes which is a list containing the number of neurons in each layer of the network
    self.num_layers = len(sizes)        # the number of layers is the length of the sizes list with the first layer of the sizes list being the inputs, not neurons
    self.sizes = sizes                  # sizes is the list of neurons in each layer with the first layer being the inputs, not neurons
          # np.random.randn is a function that returns an array of size (d0, d1,..) containing random numbers with a normal 
          # distribution with a mean of 0 and variance of 1
    self.biases = [np.random.randn(y, 1) for y in sizes [1:]]  # creates an array of arrays of y rows by 1 column for the items in the sizes list starting 
                                                               # with the 2nd row of sizes since the first is the input, not a neuron.  This is the bias list (b).
                                                               # The initial random arrays to be used for the bias values with each level of the network.
                                                               # In our case, we get 3 arrays - each 1 column wide.  The first array contains 4 rows, the
                                                               # second 3 rows and the third 2 rows.
    self.weights = [np.random.randn(y, x) for x, y in zip(sizes [:-1], sizes [1:])]
                                               # creates an array of arrays of y rows by x columns; the number of columns/rows generated changes as it iterates through 
                                               # the sizes list.  y number of rows again starts with the second row, not the first as the first is input, not a neuron.
                                               # x columns starts with the first number in the list and doesn't include the last number. 
                                               # These are the randomly generated weights assigned to each input/neuron to neuron connection.
  def feedforward(self, a):                                     # create a method to calculate the sigmoid value array for the random weights (w) and biases (b) and inputs (a) (an array)
      print('The neuron values by layer with the Sigmoid method are')
      for b, w in zip(self.biases, self.weights):        # for each pair of biases and weights arrays
        a = sigmoid((np.dot(w, a)) + b.T)                # compute the dot matrix multiplication of w (weight) times a (input/observation) plus b (bias).  
                                                         # Feed that result into the sigmoid function that is defined.
        a = a[0]                                         # An array of 1 array was created.  Pull the array out as a single array.
        print(a)                                         # Sigmoid values are between -1 and 1
      return a
 
  def forward_pass(self, a):
      print(' ')
      print('The neuron values by layer with the ReLU method are')
      for b, w in zip(self.biases, self.weights):        # for each pair of biases and weights arrays
        a = ReLU((np.dot(w,a)) + b.T)                    # compute the dot matrix multiplication of w (weight) times a (input/observation) plus b (bias).
                                                         # Feed that result into the ReLU function that is defined.  
        a = a[0]                                         # An array of 1 array was created.  Pull the array out as a single array. 
        print(a)                                         # ReLU values are from 0 to infinity
      return a
                 
# Define sigmoid function and ReLU function

def sigmoid(x):                     # Create a function to apply the sigmoid function ( 1 / (1 + e^-x) ) to input x 
    return 1/(1 + np.exp(-x))

def ReLU(x):                        # Create a function to apply the ReLU function (max(x,0)) to an input x
    return np.where(x > 0, x, 0)    # result is the value of x if it is greater than 0, otherwise the result is 0

#=====================================================
    

## Set up training data
## Each row is a case
## Columns 0-4 are features
## Columns 5 & 6 are targets

features_and_targets = np.array( 
                                   [ [0, 0, 0, 0, 0, 0, 1],
                                     [0, 0, 0, 0, 1, 0, 1],
                                     [0, 0, 0, 1, 1, 0, 1],
                                     [0, 0, 1, 1, 1, 0, 1],
                                     [0, 1, 1, 1, 1, 0, 1],
                                     [1, 1, 1, 1, 0, 0, 1],
                                     [1, 1, 1, 0, 0, 0, 1],
                                     [1, 1, 0, 0, 0, 0, 1],
                                     [1, 0, 0, 0, 0, 0, 1],
                                     [1, 0, 0, 1, 0, 0, 1],
                                     [1, 0, 1, 1, 0, 0, 1],
                                     [1, 1, 0, 1, 0, 0, 1],
                                     [0, 1, 0, 1, 1, 0, 1],
                                     [0, 0, 1, 0, 1, 0, 1],
                                     [1, 0, 1, 1, 1, 1, 0],
                                     [1, 1, 0, 1, 1, 1, 0],
                                     [1, 0, 1, 0, 1, 1, 0],
                                     [1, 0, 0, 0, 1, 1, 0],
                                     [1, 1, 0, 0, 1, 1, 0],
                                     [1, 1, 1, 0, 1, 1, 0],
                                     [1, 1, 1, 1, 1, 1, 0],
                                     [1, 0, 0, 1, 1, 1, 0]  ]
                           , dtype=float)

# shuffle our cases
np.random.shuffle(features_and_targets)

net = Network ([5, 4, 3, 2])    # uses Network object to create an object with 4 layers - an input layer of 5 features 
                                # and three layers of 4, 3 and 2 neurons respectively along with the randomly generated 9 biases and 38 weights   

for i in range(22):
    features = features_and_targets[i,0:5]           # training input - 22 observatons of 5 features
    targets = features_and_targets[i,5:7]            # training output - 22 observations of 2 outputs
    print('The inputs are ')
    print(features)
    predictedSigmoid = net.feedforward(features)  
    predictedReLU = net.forward_pass(features)
    print(' ')
    print('For observation ', i+1, ', the data is:')
    print('Features : ',features, ' Targets : ', targets, 'Predicted Targets using Sigmoid method: ', predictedSigmoid, 'Predicted Targets using ReLU : ', predictedReLU)
   
    print(' ')
    print('--------------------------------------------------------------------------------------')
    print(' ')
    
    

The inputs are 
[0. 0. 1. 1. 1.]
The neuron values by layer with the Sigmoid method are
[0.49339959 0.07169819 0.93955295 0.56254107]
[0.26917031 0.6464663  0.06384462]
[0.64368176 0.46732201]
 
The neuron values by layer with the ReLU method are
[0.         0.         2.74363634 0.25148133]
[0.52848565 0.         0.        ]
[1.15854704 0.        ]
 
For observation  1 , the data is:
Features :  [0. 0. 1. 1. 1.]  Targets :  [0. 1.] Predicted Targets using Sigmoid method:  [0.64368176 0.46732201] Predicted Targets using ReLU :  [1.15854704 0.        ]
 
--------------------------------------------------------------------------------------
 
The inputs are 
[0. 0. 0. 0. 0.]
The neuron values by layer with the Sigmoid method are
[0.09632948 0.27421422 0.71346408 0.63600475]
[0.27631824 0.65141311 0.0413272 ]
[0.63850263 0.46661813]
 
The neuron values by layer with the ReLU method are
[0.         0.         0.91226818 0.55806522]
[0.         0.06679124 0.        ]
[0.35304236 0.41159963]

Network object definition "Neural Networks and Deep Learning" by  Michael Nielsen page 24

In above network, layer 1 to layer 2 linear algebra:

 _                                _    _    _        _     _
| w111  w121   w131   w141    w151 |  | a11  |      |  b21  |
| w112  w122   w132   w142    w152 |. | a12  |      |  b22  |
| w113  w123   w133   w143    w153 |  | a13  |   +  |  b23  |
| w114  w124   w134   w144    w154 |  | a14  |      |  b24  |
 -                                -   | a15  |       _     _
                                       _    _
                                 
Matrix of the weight (w) assigned to each (a) that connect to a neuron (a) in the next layer.  Each (a) has one weight (w) connection with every neuron (a) in the next layer.  There should be the same number of a's in the a input/activation vector as there are columns in the weight matrix.  The number of rows in the weight matrix is the number of a's (neurons) in the next layer.  The dot product of the w matrix times the a vector is a vector of the length of the number of rows in the weight matrix.  That vector gets added to the bias vector (which is a vector of the biases associated with each neuron on the next level - one (b) per next level neuron).  The result of the vector addition is another vector of the same size that represents the value for a (activation) for the neuron on the next level which is used for input to go to the
next layer.

Result is a 4 element vector with the values for a21, a22, a23 and a24

Layer 2 to 3

 _                           _    _    _        _     _
| w211  w221   w231   w241    |  | a21  |      |  b31  |
| w212  w222   w232   w242    |. | a22  |      |  b32  |
| w213  w223   w233   w243    |  | a23  |   +  |  b33  |
 -                           -   | a24  |       -     -
                                  -    -    
Result is a 3 element vector with the values for a31, a32 and a33


Layer 3 to 4

 _                   _    _    _        _     _
| w311  w321   w331   |  | a31  |      |  b41  |
| w312  w322   w332   | .| a32  |   +  |  b42  |
 -                   -   | a33  |       -     -
                          -    -  
                         
Result is a 2 element vector with the values for a41 and a42 - which is what we are trying to predict in this network


