In [None]:
# Define epochs for all tests
epochs = 10000

# We let (x,y) be the following values.
import numpy as np
x_arr = np.arange(1, 6)
y_arr = x_arr*5

##One Hidden Layer (two nodes)

The code below is based on the following basic neural network, consisting of one input $x$, two hidden nodes ($z_1$, $z_2$) in the same layer, and one output $y_0$.

$$
\require{enclose} \xrightarrow{w_1} \enclose{circle}{z_1} \xrightarrow{w_3}
$$

$$
\require{enclose}
 \enclose{circle}{x}\hspace{5cm}\enclose{circle}{y_0}
$$

$$
\require{enclose}
 \hspace{0.75cm}\xrightarrow{w_2} \enclose{circle}{z_1}\xrightarrow{w_4}
$$

In addition, we will use activation functions $\Gamma_n$, where $n$ signifies the activation layer. For our instance, we will allow all instances of $\Gamma_n$ be the following Leaky ReLU, or Leaky *Rectified Linear Unit*, function.

$$
\Gamma(x) = \begin{cases}
\varepsilon x, & \text{if } x < 0 \\
x, & \text{otherwise}
\end{cases} \text{, } \varepsilon > 0.
$$

Then its derivative.

$$
\Gamma'(x) = \begin{cases}
\varepsilon, & \text{if } x < 0 \\
1, & \text{otherwise}
\end{cases} \text{, } \varepsilon > 0.
$$

***

**Forward Pass**

We define the following functions.
$$
z_1(x) = \Gamma(w_1*x) \\
z_2(x) = \Gamma(w_2*x) \\
y_0(x) = \Gamma(w_3*z_1 + w_4*z_2)
$$

We have a forward pass through our network,
$$
y_0(x) = \Gamma(w_3*z_1 + w_4*z_2)
$$

***
**Loss Function**

We define our loss function as a comparison between our target output versus our current forward pass output. We let $y$ be our target output. Our loss function is as follows. \\
$$
L(y_0) = \frac{1}{2}(y_0-y)^2
$$

*Note: We use our constant $\frac{1}{2}$ to simply obtain a clean derivative of this function.*

***

**Back Propagation**

We can adjust our weights using the gradients of our network. We first compute the following derivatives according to each weight in our system.

$$
\frac{\partial L}{\partial w_1} = \frac{\partial L}{\partial y_0} \frac{\partial y_0}{\partial z_1} \frac{\partial z_1}{\partial w_1} \\
\frac{\partial L}{\partial w_2} = \frac{\partial L}{\partial y_0} \frac{\partial y_0}{\partial z_2} \frac{\partial z_2}{\partial w_2} \\
\frac{\partial L}{\partial w_3} = \frac{\partial L}{\partial y_0} \frac{\partial y_0}{\partial w_3} \\
\frac{\partial L}{\partial w_4} = \frac{\partial L}{\partial y_0} \frac{\partial y_0}{\partial w_4}
$$
***

**Partial Derivative Components**

For reference, below each of the partial derivatives are defined:

$$
\frac{\partial L}{\partial y_0} = y_0 - y \\
\frac{\partial y_0}{\partial w_3} = \Gamma'(w_3*z_1 + w_4*z_2) * z_1 \\
\frac{\partial y_0}{\partial w_4} = \Gamma'(w_3*z_1 + w_4*z_2) * z_2 \\
\frac{\partial y_0}{\partial z_1} = \Gamma'(w_3*z_1 + w_4*z_2) * w_3 \\
\frac{\partial y_0}{\partial z_2} = \Gamma'(w_3*z_1 + w_4*z_2) * w_4 \\
\frac{\partial z_1}{\partial w_1} = \Gamma'(w_1*x) * x \\
\frac{\partial z_2}{\partial w_2} = \Gamma'(w_2*x) * x \\
$$

Applying these derivatives,

$$
\frac{\partial L}{\partial w_1} = (y_0-y) * \Gamma'(w_3*z_1 + w_4*z_2) * w_3 *\Gamma'(w_1*x) * x \\
\frac{\partial L}{\partial w_2} = (y_0-y) * \Gamma'(w_3*z_1 + w_4*z_2) * w_4 *\Gamma'(w_2*x) * x \\
\frac{\partial L}{\partial w_3} = (y_0-y) * \Gamma'(w_3*z_1 + w_4*z_2) * z_1 \\
\frac{\partial L}{\partial w_4} = (y_0-y) * \Gamma'(w_3*z_1 + w_4*z_2) * z_2 \\
$$


***

**Gradient Descent**

We now updates our weights as follows, with learning rate $\eta$.

$$
w1_{n+1} = w_1 - \eta \frac{\partial L}{\partial w_1} \\
w2_{n+1} = w_2 - \eta \frac{\partial L}{\partial w_2} \\
w3_{n+1} = w_3 - \eta \frac{\partial L}{\partial w_3} \\
w4_{n+1} = w_4 - \eta \frac{\partial L}{\partial w_4} \\
$$




In [None]:
# The following is the program version of the described neural network above.

w1, w2, w3, w4 = .5, .5, .5, .5
η = .1

# For leaky relu function
ε = 0.01

# Main functions
def Γ(x):
  if x < 0:
    return ε*x
  else:
    return x

def f(z_1,z_2):
  return Γ(z_1*w3 + z_2*w4)

def z_1(x):
  return Γ(w1*x)

def z_2(x):
  return Γ(w2*x)

# Derivatives
def Γ_deriv(x):
  if x < 0:
    return ε
  else:
    return 1

def deriv_loss(y_0):
  return (y_0 - y)

def deriv_weight_w1(y_0):
  return deriv_loss(y_0) * Γ_deriv(w3*z_1(x) + w4*z_2(x)) * w3 * Γ_deriv(w1*x) * x

def deriv_weight_w2(y_0):
  return deriv_loss(y_0) * Γ_deriv(w3*z_1(x) + w4*z_2(x)) * w4 * Γ_deriv(w2*x) * x

def deriv_weight_w3(y_0):
  return deriv_loss(y_0) * Γ_deriv(w3*z_1(x) + w4*z_2(x)) * z_1(x)

def deriv_weight_w4(y_0):
  return deriv_loss(y_0) * Γ_deriv(w3*z_1(x) + w4*z_2(x)) * z_2(x)


print("Beginning values:")
print("Weight w1     : ", w1)
print("Weight w2     : ", w2)
print("Weight w3     : ", w3)
print("Weight w4     : ", w4)


for i in range(len(x_arr)):
  x = x_arr[i]
  print("Input (x)    : ", x, end = " --> ")
  print("Output (y_0) : ", f(z_1(x), z_2(x)))

print("\nStarting training...")

for i in range(len(x_arr)):
  x = x_arr[i]
  y = y_arr[i]
  print("Training with (x,y) to be (" + str(x) + "," + str(y) + ").")
  print("Learning rate: ", η)

  print("Epoch ", end="")
  for j in range(epochs):
    print(".", end="")
    y_0 = f(z_1(x), z_2(x))
    w1 = w1 - η * deriv_weight_w1(y_0)
    w2 = w2 - η * deriv_weight_w2(y_0)
    w3 = w3 - η * deriv_weight_w3(y_0)
    w4 = w4 - η * deriv_weight_w4(y_0)
  print()


print("\nTraining complete!")

print("\nEnding values:")
print("Weight w1     : ", w1)
print("Weight w2     : ", w2)
print("Weight w3     : ", w3)
print("Weight w4     : ", w4)

y0_arr2 = np.zeros(len(x_arr))
for i in range(len(x_arr)):
  x = x_arr[i]
  y0_arr2[i] = f(z_1(x), z_2(x))
  print("Input (x)    : ", x, end = " --> ")
  print("Output (y_0) : ", f(z_1(x), z_2(x)))


print()

print("This NN is NOT trained on the following inputs.")
print("Input (x)    : ", 6, end = " --> ")
print("Output (y_0) : ", f(z_1(6), z_2(6)))
print("Input (x)    : ", 7, end = " --> ")
print("Output (y_0) : ", f(z_1(7), z_2(7)))
print("Input (x)    : ", 8, end = " --> ")
print("Output (y_0) : ", f(z_1(8), z_2(8)))
print("Input (x)    : ", 9, end = " --> ")
print("Output (y_0) : ", f(z_1(9), z_2(9)))
print("Input (x)    : ", 10, end = " --> ")
print("Output (y_0) : ", f(z_1(10), z_2(10)))

second_neural_network = y0_arr2

print("----------------")
print("Result of second NN:")
print(str(x_arr) + " --> " + str(second_neural_network))
print("----------------")

Beginning values:
Weight w1     :  0.5
Weight w2     :  0.5
Weight w3     :  0.5
Weight w4     :  0.5
Input (x)    :  1 --> Output (y_0) :  0.5
Input (x)    :  2 --> Output (y_0) :  1.0
Input (x)    :  3 --> Output (y_0) :  1.5
Input (x)    :  4 --> Output (y_0) :  2.0
Input (x)    :  5 --> Output (y_0) :  2.5

Starting training...
Training with (x,y) to be (1,5).
Learning rate:  0.1
Epoch ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

##Generalizing for $x$ Nodes in the Hidden Layer

**What if we wanted $x$ nodes in the hidden layer?**

$$
\require{enclose} \xrightarrow{w_1} \enclose{circle}{z_1} \xrightarrow{w_4}
$$

$$
\require{enclose}
 \enclose{circle}{x}\xrightarrow{w_2} \enclose{circle}{z_2} \xrightarrow{w_5}\enclose{circle}{y_0}
$$

$$
\require{enclose}
 \hspace{0.75cm}\xrightarrow{w_3} \enclose{circle}{z_3}\xrightarrow{w_6}
$$

***

**We notice that:** \\

$$z_i = \Gamma(w_i * x)$$ \\
$$
\begin{equation}
y_0 = \Gamma{} \left( \sum_{i=1}^{n} z_i*w_{1i} \right) \end{equation}
$$


In [None]:
# The following is the program version of the described neural network above.
hidden_neurons = 3

hidden_layer_weights = []
out_weights = []


for i in range(hidden_neurons):
  # layer one weights
  hidden_layer_weights.append(.1 + i*0.05)
  # layer two weights
  out_weights.append(.1 + i*0.05)

η = .1

# For leaky relu function
ε = 0.01

# Main functions
def Γ(x):
  if x < 0:
    return ε*x
  else:
    return x

def f(z_list):
  return Γ(sum(z_list))

def z_list(x):
  z_list = []
  for i in range(hidden_neurons):
    z_list.append(z(hidden_layer_weights[i],x) * out_weights[i])
  return z_list

def z(wi,x):
  return Γ(wi*x)

# Derivatives
def Γ_deriv(x):
  if x < 0:
    return ε
  else:
    return 1

def deriv_loss(y_0):
  return (y_0 - y)

def deriv_hidden_weight(y_0, w_index):
  return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * out_weights[w_index] * Γ_deriv(hidden_layer_weights[w_index]*x) * x

def deriv_outer_weight(y_0, w_index):
  return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * z(hidden_layer_weights[w_index], x)

print("Beginning values:")
print("Hidden Layer Weights: ")
for i in range(hidden_neurons):
  print(f"Weight w{i}     : ", hidden_layer_weights[i])

print("Outer Layer Weights: ")
for i in range(hidden_neurons):
  print(f"Weight w{i}     : ", hidden_layer_weights[i])


for i in range(len(x_arr)):
  x = x_arr[i]
  print("Input (x)    : ", x, end = " --> ")
  print("Output (y_0) : ", f(z_list(x)))

print("\nStarting training...")

for i in range(len(x_arr)):
  x = x_arr[i]
  y = y_arr[i]
  print("Training with (x,y) to be (" + str(x) + "," + str(y) + ").")
  print("Learning rate: ", η)

  print("Epoch ", end="")
  for j in range(epochs):
    print(".", end="")
    y_0 = f(z_list(x))

    # updating hidden layer
    for i in range(hidden_neurons):
      hidden_layer_weights[i] = hidden_layer_weights[i] - η * deriv_hidden_weight(y_0, i)

    # updating outer layer
    for i in range(hidden_neurons):
      out_weights[i] = out_weights[i] - η * deriv_outer_weight(y_0, i)

  print()


print("\nTraining complete!")

print("\nEnding values:")
print("Hidden Layer Weights: ")
for i in range(hidden_neurons):
  print(f"Weight w{i}     : ", hidden_layer_weights[i])

print("Outer Layer Weights: ")
for i in range(hidden_neurons):
  print(f"Weight w{i}     : ", hidden_layer_weights[i])

y0_arr2 = np.zeros(len(x_arr))
for i in range(len(x_arr)):
  x = x_arr[i]
  y0_arr2[i] = f(z_list(x))
  print("Input (x)    : ", x, end = " --> ")
  print("Output (y_0) : ", f(z_list(x)))


print()

print("This NN is NOT trained on the following inputs.")
print("Input (x)    : ", 6, end = " --> ")
print("Output (y_0) : ", f(z_list(6)))
print("Input (x)    : ", 7, end = " --> ")
print("Output (y_0) : ", f(z_list(7)))
print("Input (x)    : ", 8, end = " --> ")
print("Output (y_0) : ", f(z_list(8)))
print("Input (x)    : ", 9, end = " --> ")
print("Output (y_0) : ", f(z_list(9)))
print("Input (x)    : ", 10, end = " --> ")
print("Output (y_0) : ", f(z_list(10)))

# second_neural_network = y0_arr2

# print("----------------")
# print("Result of second NN:")
# print(str(x_arr) + " --> " + str(second_neural_network))
# print("----------------")

Beginning values:
Hidden Layer Weights: 
Weight w0     :  0.1
Weight w1     :  0.15000000000000002
Weight w2     :  0.2
Outer Layer Weights: 
Weight w0     :  0.1
Weight w1     :  0.15000000000000002
Weight w2     :  0.2
Input (x)    :  1 --> Output (y_0) :  0.07250000000000001
Input (x)    :  2 --> Output (y_0) :  0.14500000000000002
Input (x)    :  3 --> Output (y_0) :  0.21750000000000005
Input (x)    :  4 --> Output (y_0) :  0.29000000000000004
Input (x)    :  5 --> Output (y_0) :  0.36250000000000004

Starting training...
Training with (x,y) to be (1,5).
Learning rate:  0.1
Epoch ........................................................................................................................................................................................................................................................................................................................................................................................................................

  return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * out_weights[w_index] * Γ_deriv(hidden_layer_weights[w_index]*x) * x
  hidden_layer_weights[i] = hidden_layer_weights[i] - η * deriv_hidden_weight(y_0, i)


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [None]:
  # The following is the program version of the described neural network above, abstracted in functions

def lecture_learning_training(hidden_neurons):
    hidden_layer_weights = []
    out_weights = []


    for i in range(hidden_neurons):
      # layer one weights
      hidden_layer_weights.append(.5)
      # layer two weights
      out_weights.append(.5)

    η = .1

    # For leaky relu function
    ε = 0.01

    # Main functions
    def Γ(x):
      if x < 0:
        return ε*x
      else:
        return x

    def f(z_list):
      return Γ(sum(z_list))

    def z_list(x):
      z_list = []
      for i in range(hidden_neurons):
        z_list.append(z(hidden_layer_weights[i],x) * out_weights[i])
      return z_list

    def z(wi,x):
      return Γ(wi*x)

    # Derivatives
    def Γ_deriv(x):
      if x < 0:
        return ε
      else:
        return 1

    def deriv_loss(y_0):
      return (y_0 - y)

    def deriv_hidden_weight(y_0, w_index):
      return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * out_weights[w_index] * Γ_deriv(hidden_layer_weights[w_index]*x) * x

    def deriv_outer_weight(y_0, w_index):
      return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * z(hidden_layer_weights[w_index], x)

    print("Beginning values:")
    print("Hidden Layer Weights: ")
    for i in range(hidden_neurons):
      print(f"Weight w{i}     : ", hidden_layer_weights[i])

    print("Outer Layer Weights: ")
    for i in range(hidden_neurons):
      print(f"Weight w{i}     : ", hidden_layer_weights[i])


    for i in range(len(x_arr)):
      x = x_arr[i]
      print("Input (x)    : ", x, end = " --> ")
      print("Output (y_0) : ", f(z_list(x)))

    print("\nStarting training...")

    for i in range(len(x_arr)):
      x = x_arr[i]
      y = y_arr[i]
      print("Training with (x,y) to be (" + str(x) + "," + str(y) + ").")
      print("Learning rate: ", η)

      print("Epoch ", end="")
      for j in range(epochs):
        print(".", end="")
        y_0 = f(z_list(x))

        # updating hidden layer
        for i in range(hidden_neurons):
          hidden_layer_weights[i] = hidden_layer_weights[i] - η * deriv_hidden_weight(y_0, i)

        # updating outer layer
        for i in range(hidden_neurons):
          out_weights[i] = out_weights[i] - η * deriv_outer_weight(y_0, i)

      print()


    print("\nTraining complete!")

    print("\nEnding values:")
    print("Hidden Layer Weights: ")
    for i in range(hidden_neurons):
      print(f"Weight w{i}     : ", hidden_layer_weights[i])

    print("Outer Layer Weights: ")
    for i in range(hidden_neurons):
      print(f"Weight w{i}     : ", hidden_layer_weights[i])

    y0_arr2 = np.zeros(len(x_arr))
    for i in range(len(x_arr)):
      x = x_arr[i]
      y0_arr2[i] = f(z_list(x))
      print("Input (x)    : ", x, end = " --> ")
      print("Output (y_0) : ", f(z_list(x)))


    print()

    print("This NN is NOT trained on the following inputs.")
    print("Input (x)    : ", 6, end = " --> ")
    print("Output (y_0) : ", f(z_list(6)))
    print("Input (x)    : ", 7, end = " --> ")
    print("Output (y_0) : ", f(z_list(7)))
    print("Input (x)    : ", 8, end = " --> ")
    print("Output (y_0) : ", f(z_list(8)))
    print("Input (x)    : ", 9, end = " --> ")
    print("Output (y_0) : ", f(z_list(9)))
    print("Input (x)    : ", 10, end = " --> ")
    print("Output (y_0) : ", f(z_list(10)))

lecture_learning_training(15)

Beginning values:
Hidden Layer Weights: 
Weight w0     :  0.5
Weight w1     :  0.5
Weight w2     :  0.5
Weight w3     :  0.5
Weight w4     :  0.5
Weight w5     :  0.5
Weight w6     :  0.5
Weight w7     :  0.5
Weight w8     :  0.5
Weight w9     :  0.5
Weight w10     :  0.5
Weight w11     :  0.5
Weight w12     :  0.5
Weight w13     :  0.5
Weight w14     :  0.5
Outer Layer Weights: 
Weight w0     :  0.5
Weight w1     :  0.5
Weight w2     :  0.5
Weight w3     :  0.5
Weight w4     :  0.5
Weight w5     :  0.5
Weight w6     :  0.5
Weight w7     :  0.5
Weight w8     :  0.5
Weight w9     :  0.5
Weight w10     :  0.5
Weight w11     :  0.5
Weight w12     :  0.5
Weight w13     :  0.5
Weight w14     :  0.5
Input (x)    :  1 --> Output (y_0) :  3.75
Input (x)    :  2 --> Output (y_0) :  7.5
Input (x)    :  3 --> Output (y_0) :  11.25
Input (x)    :  4 --> Output (y_0) :  15.0
Input (x)    :  5 --> Output (y_0) :  18.75

Starting training...
Training with (x,y) to be (1,5).
Learning rate:  0.1
Epoch 

  z_list.append(z(hidden_layer_weights[i],x) * out_weights[i])
  return deriv_loss(y_0) * Γ_deriv(sum(z_list(x))) * out_weights[w_index] * Γ_deriv(hidden_layer_weights[w_index]*x) * x
  hidden_layer_weights[i] = hidden_layer_weights[i] - η * deriv_hidden_weight(y_0, i)


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................