In [1]:
# imports
import numpy as np

# Initialization

Useful links:
1) https://ai.stackexchange.com/questions/37968/back-propagation-activation-function-derivative
2) https://www.cs.swarthmore.edu/~meeden/cs81/f15/BackPropDeriv.pdf

In [2]:
#activiation function for layer1
def tanh(x):
    return np.tanh(x)
#derivative of activation function for backprop
def tanh_derivative(x):
    return 1.0 - np.tanh(x)**2

Useful links:
1) https://xnought.github.io/backprop-explainer/
2) https://medium.com/@eugenesh4work/from-inputs-to-outputs-understanding-the-inner-workings-of-a-neural-network-8a5fc25c7388
3) https://datascience.stackexchange.com/questions/44703/how-does-gradient-descent-and-backpropagation-work-together

+ input node - Each node corresponds to one feature/dimension of the input data. For example, if you're working with a dataset that has three features (temp, humidity, wind speed) then you'd set this to 3.
+ hidden node - Helps transform input data into a useful representation for hte output layer. (weights and activation functions)
+ output node - Each node corresponds to one dimension of the output data. For example, if you're trying to predict two different values from each input (temp and humidity the next day) then you'd set this to 2.
+ training pair - This is set based on the dataset size. Each column of the input matrix I and output matrix Y is one training example.

In [3]:
num_input_nodes = 3
num_hidden_nodes = 4
num_output_nodes = 2
num_training_pairs = 5

Useful links:
1) https://numpy.org/doc/stable/reference/random/generated/numpy.random.uniform.html

In [4]:
# Generates line distributed between -1 and 1 for tanh later, setting shape to rows,columns
I = np.random.uniform(-1, 1, (num_input_nodes, num_training_pairs))
Y = np.random.uniform(-1, 1, (num_output_nodes, num_training_pairs))

Useful links:
1) https://datascience.stackexchange.com/questions/67440/how-to-understand-the-weights-and-biases-for-beginners
2) https://www.kaggle.com/code/ayuraj/experiment-tracking-with-weights-and-biases

In [5]:
# Each row corresponds to a hidden node and each column corresponds to an input node
W1 = np.random.uniform(-1, 1, (num_hidden_nodes, num_input_nodes))
W2 = np.random.uniform(-1, 1, (num_output_nodes, num_hidden_nodes))
# Each bias vector is added to the input of each output node
b1 = np.random.uniform(-1, 1, (num_hidden_nodes, 1))
b2 = np.random.uniform(-1, 1, (num_output_nodes, 1))

Useful links:
1) https://www.jeremyjordan.me/nn-learning-rate/
2) https://medium.com/@theom/a-very-short-visual-introduction-to-learning-rate-schedulers-with-code-189eddffdb00

In [6]:
# Controls how much the weights and biases are adjusted during training while moving toward a min of the loss function
learning_rate = 0.1

# Loop for training cycles

Useful links:
1) https://www.geeksforgeeks.org/numpy-tanh-python/
2) https://numpy.org/doc/stable/reference/generated/numpy.tanh.html
3) https://numpy.org/doc/stable/reference/generated/numpy.dot.html
4) https://www.statlect.com/matrix-algebra/linear-combinations (see a1, a2 for this)

### Layer 1:
$
( a_i^2 = \sum_{k=1}^{N} w_{i,k}^2 y_k^1 + b_i^2 )
$

### Layer 2:
$
( a_i^2 = \sum_{k=1}^{N} w_{i,k}^2 y_k^1 + b_i^2 )
$

### Activation:
$
( y_i^2 = f^2(a_i^2) )
$

- $( f^2 )$ can be linear or $( tanh )$:

&emsp;&emsp;&emsp;$
( y_i^2 = tanh(a_i^2) )
$

In [7]:
# Calculate forward pass for layer 1 and 2
a1 = np.dot(W1, I) + b1
y1 = tanh(a1)
a2 = np.dot(W2, y1) + b2
y2 = tanh(a2)

# Evaluate error for pattern p and save this


In [8]:
print("Input Matrix (I):", I)
print("\nLayer 1 Activations (a1):", a1)
print("\nLayer 1 Outputs (y1):", y1)
print("\nLayer 2 Activations (a2):", a2)
print("\nLayer 2 Outputs (y2):", y2)

Input Matrix (I): [[-0.28222456 -0.85424555 -0.55675678 -0.47688207 -0.10187587]
 [ 0.45198202 -0.156988   -0.23747066  0.81844079 -0.06969202]
 [ 0.65715824 -0.42563625  0.63404486  0.78238117  0.78818263]]

Layer 1 Activations (a1): [[-0.62346026 -1.62468229 -0.42864078 -0.90450683 -0.03684358]
 [-0.8582281  -0.09898068 -0.49297951 -0.9930769  -0.73915073]
 [ 0.8022422  -0.45217038  0.16453386  1.2301534   0.40224061]
 [-0.52409296  0.31961053 -0.33219617 -0.81939103 -0.36239655]]

Layer 1 Outputs (y1): [[-0.55353267 -0.92530055 -0.40418476 -0.71848525 -0.03682691]
 [-0.69534362 -0.0986587  -0.45657803 -0.75867127 -0.62863178]
 [ 0.66528842 -0.42368142  0.16306504  0.84262382  0.38186448]
 [-0.48085279  0.30915472 -0.32049275 -0.67473829 -0.34732323]]

Layer 2 Activations (a2): [[-0.13935461 -0.63613299 -0.31228511 -0.25489426  0.08348299]
 [ 0.57799249 -0.0928181   0.5016833   0.60426905  0.72667249]]

Layer 2 Outputs (y2): [[-0.13845949 -0.5622608  -0.30251452 -0.2495138   0.083289