$
\begin{pmatrix}
w_{11} & w_{21} \\
w_{12} & w_{22}
\end{pmatrix}
\cdot
\begin{pmatrix}
i_1 \\
i_2 
\end{pmatrix}
= X
$
and
$O = sigmoid(X)$

$
\begin{pmatrix}
w_{11} & w_{21} \\
w_{12} & w_{22}
\end{pmatrix}
^T
\cdot
\begin{pmatrix}
e_1 \\
e_2
\end{pmatrix}
= E
$

$
\begin{pmatrix}
E_1 \cdot S_1 \cdot (1 - S_1) \\
E_2 \cdot S_2 \cdot (1 - S_2) \\
\vdots \\
E_k \cdot S_k \cdot (1 - S_k)
\end{pmatrix}
\cdot
\begin{pmatrix}
O1 \\
O2 \\
\vdots \\
O_j
\end{pmatrix}
= \Delta{W}
$

$\frac{dE}{dW_{jk}} = \frac{dE}{dO_k} \cdot \frac{dO_k}{dW_{jk}}$ $\implies$ $\frac{dE}{dW_{jk}} = (t_k - O_k) \cdot E_k \cdot sigmoid(O_k) \cdot (1 - sigmoid(O_k)) \cdot O_j^T$

$ w_{jk}^{new} = w_{jk}^{old} + \alpha \cdot \frac{dE}{dw_{jk}}$

In [None]:
import numpy as np

In [None]:
class NeuralNetwork:
  def __init__(self, input_n, hidden_1_n, hidden_2_n, outputs_n, learning_rate):
    self.input_n = input_n
    self.hidden_1_n = hidden_1_n
    self.hidden_2_n = hidden_2_n
    self.outputs_n = outputs_n
    self.learning_rate = learning_rate

    self.weights_1 = np.random.normal(0.0, 1.0 / np.sqrt(self.input_n), (self.hidden_1_n, self.input_n))
    self.weights_2 = np.random.normal(0.0, 1.0 / np.sqrt(self.hidden_1_n), (self.hidden_2_n, self.hidden_1_n))
    self.weights_3 = np.random.normal(0.0, 1.0 / np.sqrt(self.hidden_2_n), (self.outputs_n, self.hidden_2_n))

    self.sigmoid = lambda x: 1 / (1 + np.exp(-x)) # activation function
  pass

  def train(self, inputs_list, targets_list):
    inputs = np.array(inputs_list, ndmin=2).T
    targets = np.array(targets_list, ndmin=2).T

    # Forward pass
    # I -> Hidden 1
    hidden_1_inputs = np.dot(self.weights_1, inputs)
    hidden_1_outputs = self.sigmoid(hidden_1_inputs)

    # Hidden 1 -> Hidden 2
    hidden_2_inputs = np.dot(self.weights_2, hidden_1_outputs)
    hidden_2_outputs = self.sigmoid(hidden_2_inputs)

    # Hidden 2 -> Outputs
    final_inputs = np.dot(self.weights_3, hidden_2_outputs)
    final_outputs = self.sigmoid(final_inputs)

    # Backward pass
    # Output errors
    output_errors = targets - final_outputs
    # Hidden 2 errors
    hidden_2_errors = np.dot(self.weights_3.T, output_errors)
    # Hidden 1 errors
    hidden_1_errors = np.dot(self.weights_2.T, hidden_2_errors)

    # Update weights
    self.weights_3 += self.learning_rate * np.dot(output_errors * final_outputs * (1 - final_outputs), hidden_2_outputs.T)
    self.weights_2 += self.learning_rate * np.dot(hidden_2_errors * hidden_2_outputs * (1 - hidden_2_outputs), hidden_1_outputs.T) # input came from previous layer; partials errors; output came from current layer;
    self.weights_1 += self.learning_rate * np.dot(hidden_1_errors * hidden_1_outputs * (1 - hidden_1_outputs), inputs.T)

  pass

  def predict(self, inputs_list):
    inputs = np.array(inputs_list, ndmin=2).T

    # Forward pass
    # I -> Hidden 1
    hidden_1_inputs = np.dot(self.weights_1, inputs)
    hidden_1_outputs = self.sigmoid(hidden_1_inputs)

    # Hidden 1 -> Hidden 2
    hidden_2_inputs = np.dot(self.weights_2, hidden_1_outputs)
    hidden_2_outputs = self.sigmoid(hidden_2_inputs)

    # Hidden 2 -> Outputs
    final_inputs = np.dot(self.weights_3, hidden_2_outputs)
    final_outputs = self.sigmoid(final_inputs)

    return final_outputs

