### Initialize a neural network
* function `initiate_NN` randomly generate weights and biases for every neurons in every layers
* weights range [0, 1)
* bias range [0, 1)

In [6]:
import numpy as np
# Initialize a neural network, output is a network of weights and biases
def initiate_NN(n_inputs, n_neurons, n_outputs):
    Nnetwork = []
    hidden_layer = [{'weights':np.random.random(n_inputs), 'bias':np.random.random()} for i in range(n_neurons)]
    output_layer = [{'weights':np.random.random(n_neurons), 'bias':np.random.random()} for i in range(n_outputs)]
    Nnetwork.append(hidden_layer)
    Nnetwork.append(output_layer)
    return Nnetwork

In [9]:
#test: initiate a neural network of input_size = 8, hidden_layer_size = 3, output_size = 8
Nnetwork = initiate_NN(8, 3, 8)
for layer in Nnetwork:
  print(layer)
  print('number of neurons', len(layer), '\n')

[{'weights': array([0.80426023, 0.21891077, 0.15431621, 0.3077206 , 0.85869644,
       0.9733493 , 0.46128918, 0.76144003]), 'bias': 0.24356001705452524}, {'weights': array([0.35861418, 0.20480762, 0.29601315, 0.5435984 , 0.21400622,
       0.16786255, 0.36640134, 0.17290308]), 'bias': 0.5921735930594143}, {'weights': array([0.98999603, 0.74527302, 0.61044989, 0.65014764, 0.56672679,
       0.55311118, 0.57432667, 0.12814547]), 'bias': 0.669914777886722}]
number of neurons 3 

[{'weights': array([0.96262162, 0.40827858, 0.2239914 ]), 'bias': 0.3569448582631478}, {'weights': array([0.85999464, 0.92560527, 0.65647128]), 'bias': 0.8987741138188432}, {'weights': array([0.39037245, 0.08244243, 0.18199065]), 'bias': 0.3907877971844008}, {'weights': array([0.94603018, 0.88160621, 0.52190715]), 'bias': 0.640512246513775}, {'weights': array([0.3480655 , 0.51680571, 0.25476347]), 'bias': 0.28979118903135836}, {'weights': array([0.40935404, 0.60961374, 0.45066169]), 'bias': 0.34750453439393714}, 

Results above shows, 
* first layer(aks hidden layer) has 3 neurons, each neuron has 8 weights(because input layer has 8 neurons), and 1 bias value

* output layer has 8 neurons,  each neuron has 3 weights(because hidden layer has 3 neurons), , and 1 bias value

### Calculate neuron activation and transfer

In [10]:
# Calculate neuron activation
def activate(inputs, weights, bias):
    activated = np.dot(inputs, weights) + bias 
    return activated.item()  # return a scalar value

# Transfer neuron activation using Sigmoid equation
from math import exp
def transfer(activated):
    return 1.0 / (1.0 + exp(-activated))

In [11]:
# test: neuron activation and transfer function
inputs = [1, 0, 0, 0, 0, 0, 0, 0] 
weights = [0.5, 1, 1, 0, 0, 1, 1, 0]
bias = [0.8]

act1 = activate(inputs, weights, bias)
print(act1)
trans1 = transfer(act1)
print(trans1)

1.3
0.7858349830425586


### Forward propagation

In [13]:
# Forward propagation to calculate the output
def forward(Nnetwork, input_row):
    inputs = input_row
    for layer in Nnetwork:
        outputs = []
        # Calculate each neuron's activation in this layer
        for neuron in layer:
            activation = activate(inputs, neuron['weights'], neuron['bias'])
            neuron['output'] = transfer(activation)            
            outputs.append(neuron['output'])    
        inputs = outputs # The output of the hidden layer is the input of the output layer
    return outputs

In [14]:
# test forward propagation
Nnetwork = initiate_NN(8, 3, 8)
input_row = [1, 0, 0, 0, 0, 0, 0, 0]
output = forward(Nnetwork, input_row)
print(output)
print(type(output))

[0.635801560064509, 0.8618646517741291, 0.8925843915815326, 0.844466449798608, 0.7014826269856719, 0.6700013704407318, 0.8387260447617192, 0.7913040339091448]
<class 'list'>


### Back propagation
* from the last layer, reversely calculate errors, delta for each neuron

In [15]:
# Calculate gradient descent
def gradient(output):
    return output * (1.0 - output)

# Back-propagate to calculate errors for each neuron
def backward(Nnetwork, Y):
    # In reverse order, calculate each 
    for l in range(len(Nnetwork), 0, -1): 
        layer = Nnetwork[l-1]
        errors = []            
        if l == len(Nnetwork):
            # Calculate errors of the last layer using true_value Y and the output value calculated from forward propagation
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(neuron['output'] - Y[j])   
        else:
            for j in range(len(layer)):
                error = 0.0
                for neuron in Nnetwork[l]:
                    error = error + (neuron['weights'][j] * neuron['delta'])
                errors.append(error)             
        # Calculate delta 
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * gradient(neuron['output'])

In [16]:
#test back propagation
Y = inputs
backward(Nnetwork, Y)

for layer in Nnetwork:
    print("This is layer = ", Nnetwork.index(layer))
    for neuron in layer:
        print(neuron)

This is layer =  0
{'weights': array([0.14384274, 0.12878275, 0.80584928, 0.69732348, 0.88308062,
       0.65682526, 0.23758618, 0.11343477]), 'bias': 0.12236329388853007, 'output': 0.5661612559039465, 'delta': 0.07171005204620717}
{'weights': array([0.38000067, 0.71349092, 0.80736364, 0.92120311, 0.04074708,
       0.30606061, 0.25227374, 0.98360954]), 'bias': 0.32386296848509344, 'output': 0.6690438324979163, 'delta': 0.10765631603500894}
{'weights': array([0.27840316, 0.48136639, 0.62401003, 0.72232688, 0.37860356,
       0.51419296, 0.28760383, 0.66720034]), 'bias': 0.21580128800723009, 'output': 0.6210963912601727, 'delta': 0.09997856200681611}
This is layer =  1
{'weights': array([0.05873376, 0.64609207, 0.05972688]), 'bias': 0.05457477686050283, 'output': 0.635801560064509, 'delta': -0.0843330391493312}
{'weights': array([0.66266689, 0.78616185, 0.76064426]), 'bias': 0.4572777875427364, 'output': 0.8618646517741291, 'delta': 0.10260841166835048}
{'weights': array([0.98836434, 0.

### Rectify/update weights
* using delta and user-specified learning rate 


In [22]:
# Rectify weights with error and delta
def rec_weights(Nnetwork, input_row, learn_rate):
    for l in range(len(Nnetwork)): # in our case, l = 0, 1
        inputs = input_row
        if l != 0:
            inputs = [neuron['output'] for neuron in Nnetwork[l - 1]]
        for neuron in Nnetwork[l]:
            for j in range(len(inputs)):
                neuron['weights'][j] = neuron['weights'][j] - learn_rate * neuron['delta'] * inputs[j]
            neuron['bias'] = neuron['bias'] - learn_rate * neuron['delta']     

### Train a network for n times iterations
* choose of cost function: least squares,   
   $ 1/2 *[(h_\theta (x) - y]^2$

In [26]:
# Train a network for n iterations
def train_NN(Nnetwork, data_train, learn_rate, n_iterations, n_outputs):
    for n in range(n_iterations):
        sum_errors = 0.0
        for input_row in data_train:
            outputs = forward(Nnetwork, input_row)
            Y = input_row #input = output
            sum_errors = sum_errors + sum( 0.5* (outputs[i] - Y[i])**2 for i in range(len(Y)))            
            backward(Nnetwork, Y)          
            rec_weights(Nnetwork, input_row, learn_rate)
        print('n_iterations=%d, sum_errors=%.3f' % (n, sum_errors))

In [27]:
# generate an identity matrix for testing
X = np.identity(8)
X

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [28]:
# test train_NN function 
Nnetwork = initiate_NN(8, 3, 8)

# set learning_rate = .5, number of interation =10
train_NN(Nnetwork, X, 0.5, 10, 8)   

n_iterations=0, sum_errors=16.161
n_iterations=1, sum_errors=8.748
n_iterations=2, sum_errors=5.362
n_iterations=3, sum_errors=4.330
n_iterations=4, sum_errors=3.951
n_iterations=5, sum_errors=3.780
n_iterations=6, sum_errors=3.690
n_iterations=7, sum_errors=3.638
n_iterations=8, sum_errors=3.606
n_iterations=9, sum_errors=3.585


Above results show that sum_of_errors is reducing

### Predict output using trained NN

In [30]:
# predict output using a trained network
def prediction(Nnetwork_trained, input_row):
    predict = forward(Nnetwork_trained, input_row)
    prediction = [0 for i in range(len(input_row))]
    prediction[predict.index(max(predict))] = 1            
    return prediction

In [47]:
# test the pipeline

X = np.identity(8)

# generate a dataset, by randomly choose 1 row from identity matrix of size 8  
dataset = []
for i in range(100):
    row = np.random.choice(range(8))
    dataset.append(X.tolist()[row])

# initiate a neural network and train it 
Nnetwork = initiate_NN(8, 3, 8)
train_NN(Nnetwork, dataset, 0.5, 20, 8) # learning rate = 0.5, n_iterations= 20

#calculate the accuracy
def accuracy(Nnetwork, dataset):
  hits = 0  
  for r in dataset:
    predict_value = prediction(Nnetwork, r)
    #print('True Value =', r, 'Prediction=', predict_value)
    if predict_value == r:
        hits += 1
  return hits / (len(dataset))

print('accuracy:%.3f' % accuracy(Nnetwork, dataset))

n_iterations=0, sum_errors=62.673
n_iterations=1, sum_errors=43.784
n_iterations=2, sum_errors=43.597
n_iterations=3, sum_errors=43.375
n_iterations=4, sum_errors=43.031
n_iterations=5, sum_errors=42.444
n_iterations=6, sum_errors=41.405
n_iterations=7, sum_errors=39.703
n_iterations=8, sum_errors=37.407
n_iterations=9, sum_errors=34.983
n_iterations=10, sum_errors=32.937
n_iterations=11, sum_errors=31.374
n_iterations=12, sum_errors=30.130
n_iterations=13, sum_errors=29.028
n_iterations=14, sum_errors=27.954
n_iterations=15, sum_errors=26.850
n_iterations=16, sum_errors=25.700
n_iterations=17, sum_errors=24.513
n_iterations=18, sum_errors=23.308
n_iterations=19, sum_errors=22.097
accuracy:0.820


---


### Test different learning rate 

##### learning rate = 0.8

In [65]:
X = np.identity(8)
# generate a dataset, by randomly choose 1 row from identity matrix of size 8  
dataset = []
for i in range(100):
    row = np.random.choice(range(8))
    dataset.append(X.tolist()[row])

# initiate a neural network and train it 
np.random.seed(1)
Nnetwork = initiate_NN(8, 3, 8)
train_NN(Nnetwork, dataset, 0.8, 10, 8) # learning rate = 0.8, n_iterations= 10

#make predictions and calculate the accuracy
print('accuracy %.3f' % accuracy(Nnetwork, dataset))

n_iterations=0, sum_errors=57.154
n_iterations=1, sum_errors=43.944
n_iterations=2, sum_errors=43.616
n_iterations=3, sum_errors=43.103
n_iterations=4, sum_errors=42.162
n_iterations=5, sum_errors=40.449
n_iterations=6, sum_errors=37.855
n_iterations=7, sum_errors=34.834
n_iterations=8, sum_errors=31.800
n_iterations=9, sum_errors=28.920
accuracy 0.560


##### learning rate = 0.5

In [67]:
X = np.identity(8)
# generate a dataset, by randomly choose 1 row from identity matrix of size 8  
dataset = []
for i in range(100):
    row = np.random.choice(range(8))
    dataset.append(X.tolist()[row])

# initiate a neural network and train it 
np.random.seed(1)
Nnetwork = initiate_NN(8, 3, 8)
train_NN(Nnetwork, dataset, 0.5, 10, 8) # learning rate = 0.5, n_iterations= 10

#make predictions and calculate the accuracy
print('accuracy  %.3f' % accuracy(Nnetwork, dataset))

n_iterations=0, sum_errors=63.968
n_iterations=1, sum_errors=43.960
n_iterations=2, sum_errors=43.708
n_iterations=3, sum_errors=43.502
n_iterations=4, sum_errors=43.237
n_iterations=5, sum_errors=42.858
n_iterations=6, sum_errors=42.294
n_iterations=7, sum_errors=41.449
n_iterations=8, sum_errors=40.228
n_iterations=9, sum_errors=38.620
accuracy  0.430


##### learning rate = 0.3

In [68]:
X = np.identity(8)
# generate a dataset, by randomly choose 1 row from identity matrix of size 8  
dataset = []
for i in range(100):
    row = np.random.choice(range(8))
    dataset.append(X.tolist()[row])

# initiate a neural network and train it 
np.random.seed(1)
Nnetwork = initiate_NN(8, 3, 8)
train_NN(Nnetwork, dataset, 0.3, 10, 8) # learning rate = 0.3, n_iterations= 10

#make predictions and calculate the accuracy
print('accuracy  %.3f' % accuracy(Nnetwork, dataset))

n_iterations=0, sum_errors=75.764
n_iterations=1, sum_errors=44.295
n_iterations=2, sum_errors=43.771
n_iterations=3, sum_errors=43.613
n_iterations=4, sum_errors=43.491
n_iterations=5, sum_errors=43.363
n_iterations=6, sum_errors=43.215
n_iterations=7, sum_errors=43.034
n_iterations=8, sum_errors=42.808
n_iterations=9, sum_errors=42.520
accuracy  0.290


##### learning rate = 0.2

In [72]:
X = np.identity(8)
# generate a dataset, by randomly choose 1 row from identity matrix of size 8  
dataset = []
for i in range(100):
    row = np.random.choice(range(8))
    dataset.append(X.tolist()[row])

# initiate a neural network and train it 
np.random.seed(1)
Nnetwork = initiate_NN(8, 3, 8)
train_NN(Nnetwork, dataset, 0.2, 10, 8) # learning rate = 2, n_iterations= 10

#make predictions and calculate the accuracy
print('accuracy  %.3f' % accuracy(Nnetwork, dataset))

n_iterations=0, sum_errors=89.712
n_iterations=1, sum_errors=45.348
n_iterations=2, sum_errors=43.980
n_iterations=3, sum_errors=43.702
n_iterations=4, sum_errors=43.578
n_iterations=5, sum_errors=43.489
n_iterations=6, sum_errors=43.407
n_iterations=7, sum_errors=43.324
n_iterations=8, sum_errors=43.233
n_iterations=9, sum_errors=43.132
accuracy  0.290


**observation**
  * with fixed weights, smaller learning rate, slower convergence, it requires more iteration. 