<a href="https://colab.research.google.com/github/omidkhalafbeigi/neural_network_from_scratch/blob/main/Neural_Network_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np 
from numpy.random import normal, rand 
from sklearn.metrics import accuracy_score 
from sklearn.datasets import load_iris  
from sklearn.model_selection import train_test_split 

In [None]:
dataset = load_iris()
X, y = dataset.data, dataset.target 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

In [None]:
learning_rate = float(input('Enter learning rate: '))
hidden_num = int(input('Enter number of hidden layers: '))
hidden_nodes = list()
activations = list()

for h in range(hidden_num):
  nodes_num = int(input(f'Enter number of nodes in hidden layer {h + 1}: '))
  hidden_nodes.append(nodes_num)
  hidden_act = input(f'Enter activation for hidden layer {h + 1} (sigmoid, tanh, relu, linear): ')
  activations.append(hidden_act)

print('--------------------------------')
output_nodes_num = int(input('Enter number of nodes in output layer: '))
hidden_nodes.append(output_nodes_num)
output_activation = input('Enter activation output layer (sigmoid, linear): ')
activations.append(output_activation)

Enter learning rate: 0.2
Enter number of hidden layers: 1
Enter number of nodes in hidden layer 1: 64
Enter activation for hidden layer 1 (sigmoid, tanh, relu, linear): sigmoid
--------------------------------
Enter number of nodes in output layer: 3
Enter activation output layer (sigmoid, linear): sigmoid


In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def sigmoid_der(x):
  return x * (1 - x) # Input should be sigmoid

def tanh(x):
  return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

def tanh_der(x):
  return 1 - np.power(x, 2) # Input should be tanh

def relu_apply(x):
  if x > 0: return x
  else: return 0

def relu(x):
  return np.vectorize(relu_apply)(x)

def relu_der_apply(x):
  if x > 0: return 1
  else: return 0

def relu_der(x):
  return np.vectorize(relu_der_apply)(x)

def linear(x): return x 
def linear_der(x): return (x / x)

def mse(d, y):
  return (d - y)

def get_activation(function_name):
  activations = {'sigmoid':sigmoid, 'tanh':tanh, 'relu':relu, 'linear':linear}
  return activations[function_name]

def get_activation_der(function_name):
  activations = {'sigmoid':sigmoid_der, 'tanh':tanh_der, 'relu':relu_der, 'linear':linear_der}
  return activations[function_name]

In [None]:
def weight_bias_init(input_dim, hidden_num):
  weights_list = list()
  bias_list = list()

  if hidden_num > 0:
    for h_idx in range(hidden_num):
      if h_idx == 0:
        w = normal(loc=0, scale=1, size=(input_dim, hidden_nodes[h_idx]))
        b = normal(loc=0, scale=1, size=hidden_nodes[h_idx])
      else:
        w = normal(loc=0, scale=1, size=(hidden_nodes[h_idx - 1], hidden_nodes[h_idx]))
        b = normal(loc=0, scale=1, size=hidden_nodes[h_idx])
      weights_list.append(w)
      bias_list.append(b)

    w = normal(loc=0, scale=1, size=(hidden_nodes[-2], hidden_nodes[-1]))
    b = normal(loc=0, scale=1, size=hidden_nodes[-1])
    weights_list.append(w)
    bias_list.append(b)
  else:
    w = normal(loc=0, scale=1, size=(input_dim, output_nodes_num))
    b = normal(loc=0, scale=1, size=output_nodes_num)
    weights_list.append(w)
    bias_list.append(b)

  return weights_list, bias_list

In [None]:
def predict(X_train, w, b, hidden_num):
  if hidden_num > 0:
    pred, _ = forward_mlp(X_train, w, b)
    pred = np.round(pred)
  else:
    activation = get_activation(activations[0])
    pred = activation(np.dot(X_train, w) + b)
    pred = np.round(pred)
  return pred

In [None]:
def forward_mlp(X, weights_list, bias_list):
  outputs_list = list()
  output = X.copy()
  for layer_idx in range(len(weights_list)):
    w = weights_list[layer_idx]
    b = bias_list[layer_idx]
    activation = get_activation(activations[layer_idx])
    output = np.dot(output, w)
    output += b
    output = activation(output)
    outputs_list.append(output)
  return output, outputs_list

In [None]:
def train_mlp(X_train, y_train, hidden_num, epochs=10):
  weights_list, bias_list = weight_bias_init(X_train.shape[-1], hidden_num)

  for epoch in range(epochs):
    epoch_error = list()
    for sample_idx in range(len(X_train)):
      new_weights_list = list()
      new_bias_list = list()

      sample = X_train[sample_idx]
      label = y_train[sample_idx]
      output, outputs_list = forward_mlp(sample, weights_list, bias_list)
      error = mse(label, output)
      epoch_error.append(np.abs(error))

      for layer_idx in range(hidden_num, 0, -1):
        new_w = weights_list[layer_idx].copy()
        new_b = bias_list[layer_idx].copy()
        activation_der = get_activation_der(activations[layer_idx])

        if layer_idx == hidden_num: gradient = activation_der(output) * error # For output layer 
        else: gradient = activation_der(outputs_list[layer_idx]) * np.dot(weights_list[layer_idx + 1], gradient.T)

        # Update weight 
        for w_i_idx in range(new_w.shape[0]):
          for w_j_idx in range(new_w.shape[1]):
            y = outputs_list[layer_idx - 1][w_i_idx] # if layer_idx = 1, then (y) will be first hidden layer's output 
            grad = gradient[w_j_idx]
            new_w[w_i_idx][w_j_idx] += (learning_rate * grad * y)
        
        # Update bias 
        for w_j_idx in range(new_w.shape[1]):
          grad = gradient[w_j_idx]
          new_b[w_j_idx] += (learning_rate * grad)

        new_weights_list.append(new_w)
        new_bias_list.append(new_b)


      # For first layer parameters 
      layer_idx = 0
      gradient = activation_der(outputs_list[layer_idx]) * np.dot(weights_list[layer_idx + 1], gradient.T)
      new_w = weights_list[layer_idx].copy()
      new_b = bias_list[layer_idx].copy()
      # Update weight 
      for w_i_idx in range(new_w.shape[0]):
        for w_j_idx in range(new_w.shape[1]):
          y = sample[w_i_idx]
          grad = gradient[w_j_idx]
          new_w[w_i_idx][w_j_idx] += (learning_rate * grad * y)

      # Update bias 
      for w_j_idx in range(new_w.shape[1]):
        grad = gradient[w_j_idx]
        new_b[w_j_idx] += (learning_rate * grad)

      new_weights_list.append(new_w)
      new_bias_list.append(new_b)
      new_weights_list.reverse()
      new_bias_list.reverse()
      weights_list = new_weights_list 
      bias_list = new_bias_list

    print(f'Epoch: {epoch + 1} - Loss: {np.mean(epoch_error)}')

  return weights_list, bias_list

In [None]:
def train_perceptron(X_train, y_train, epochs): # There is no hidden layer 
  weights_list, bias_list = weight_bias_init(X_train.shape[-1], hidden_num=0)
  new_w = weights_list[0].copy()
  new_b = bias_list[0].copy()
  activation = get_activation(activations[0])
  activation_der = get_activation_der(activations[0])

  for epoch in range(epochs):
    epoch_error = list()
    for sample_idx in range(len(X_train)):
      sample = X_train[sample_idx]
      label = y_train[sample_idx]
      output = np.dot(sample, new_w) + new_b 
      output = activation(output)
      error = mse(label, output)
      gradient = activation_der(output) * error 

      for w_i_idx in range(new_w.shape[0]):
        for w_j_idx in range(new_w.shape[1]):
          grad = gradient[w_j_idx]
          y = sample[w_i_idx]
          new_w[w_i_idx][w_j_idx] += (learning_rate * grad * y)

      for w_j_idx in range(new_w.shape[1]):
        grad = gradient[w_j_idx]
        y = sample[w_j_idx]
        new_b += (learning_rate * grad)

      epoch_error.append(np.abs(error))

    print(f'Epoch: {epoch + 1} - Loss: {np.mean(epoch_error)}')

  return new_w, new_b

In [None]:
y_train_mlp = list()
for y in y_train:
  if int(y) == 0: y_train_mlp.append([1, 0, 0])
  elif int(y) == 1: y_train_mlp.append([0, 1, 0])
  elif int(y) == 2: y_train_mlp.append([0, 0, 1])

y_test_mlp = list()
for y in y_test:
  if int(y) == 0: y_test_mlp.append([1, 0, 0])
  elif int(y) == 1: y_test_mlp.append([0, 1, 0])
  elif int(y) == 2: y_test_mlp.append([0, 0, 1])

In [None]:
epochs = 100 

if hidden_num > 0: w, b = train_mlp(X_train, y_train_mlp, hidden_num, epochs)
else: w, b = train_perceptron(X_train, y_train_mlp, epochs)

Epoch: 1 - Loss: 0.32991604575549516
Epoch: 2 - Loss: 0.2907834505198954
Epoch: 3 - Loss: 0.27212548018658256
Epoch: 4 - Loss: 0.2604081885043095
Epoch: 5 - Loss: 0.25060481303968185
Epoch: 6 - Loss: 0.24249673056975393
Epoch: 7 - Loss: 0.2358501614478481
Epoch: 8 - Loss: 0.23036708654420185
Epoch: 9 - Loss: 0.2257275049846474
Epoch: 10 - Loss: 0.22167503313162695
Epoch: 11 - Loss: 0.2180602751606187
Epoch: 12 - Loss: 0.21482874039448552
Epoch: 13 - Loss: 0.21196907313786312
Epoch: 14 - Loss: 0.2096314817903886
Epoch: 15 - Loss: 0.15515380500192952
Epoch: 16 - Loss: 0.11263440160815252
Epoch: 17 - Loss: 0.10696739262295961
Epoch: 18 - Loss: 0.10320548959757089
Epoch: 19 - Loss: 0.10020534645551045
Epoch: 20 - Loss: 0.09762347308438374
Epoch: 21 - Loss: 0.09531746178966606
Epoch: 22 - Loss: 0.09321392615387383
Epoch: 23 - Loss: 0.0912726198783355
Epoch: 24 - Loss: 0.08947082345919925
Epoch: 25 - Loss: 0.08779371850584018
Epoch: 26 - Loss: 0.08622746893879035
Epoch: 27 - Loss: 0.08475467

In [None]:
pred = predict(X_test, w, b, hidden_num)
nn_accuracy = accuracy_score(y_test_mlp, pred)
print(f'NN Accuracy: {nn_accuracy}')

NN Accuracy: 1.0
