In [1]:
import numpy as np

<img src="imgs/neuron1.jpg" alt="Neuron1" style="width:400px;"/>


In [2]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

def mse_loss(y_pred, y_true):
  return np.mean((y_pred-y_true)**2)

def sigmoid_derivative(x):
  return sigmoid(x)*(1-sigmoid(x))

class Neuron:
  def __init__(self, weights, bias, activation = 'sigmoid'):
    self.weights = weights
    self.bias = bias
    self.activation = activation

  def feedforward(self, inputs):
    #self.inputs = self.inputs
    z = np.dot(self.weights, inputs) + self.bias
    if self.activation=='sigmoid':
      h = sigmoid(z)
    else:
      h = z
    return h


weights = np.array([0, 1]) # w1 = 0, w2 = 1
bias = 4                   # b = 4
n = Neuron(weights, bias)

x = np.array([2, 3])       # x1 = 2, x2 = 3
print(n.feedforward(x))

0.9990889488055994


### Simple feedforward Neural Network for Regression
  - no activation at output neuron

<img src="imgs/neuron2.jpg" alt="Neuron2" style="width:400px;"/>


In [4]:
class ThreeNeuronsFeedForwardNN:
  def __init__(self, weights1, bias1, weights2, bias2, weights3, bias3):
    self.n1 = Neuron(weights1, bias1)
    self.n2 = Neuron(weights2, bias2)
    self.n3 = Neuron(weights3, bias3, activation='none')

  def feedforwardNN(self, inputs):
    h1 = self.n1.feedforward(inputs)
    h2 = self.n2.feedforward(inputs)

    inp_o = np.array([h1,h2]) # input to output neuron
    h3 = self.n3.feedforward(inp_o)
    return h3


In [5]:
weights1 = np.array([0, 1]); bias1 = 0
weights2 = np.array([0, 1]); bias2 = 0
weights3 = np.array([0, 1]); bias3 = 0

model = ThreeNeuronsFeedForwardNN( weights1, bias1, weights2, bias2, weights3, bias3)

x = np.array([2, 3])
print(model.feedforwardNN(x))

0.9525741268224334


In [28]:
##################################################################
#### Developing Neural Network from scratch for Regression #######
##################################################################

class ThreeNeuronsNNRegression:

  def __init__(self, weights1, bias1, weights2, bias2, weights3, bias3, print_loss=True):
    self.print_loss = print_loss
    self.weights1 = weights1
    self.weights2 = weights2
    self.weights3 = weights3
    self.bias1 = bias1
    self.bias2 = bias2
    self.bias3 = bias3
    self.n1 = Neuron(self.weights1, self.bias1)
    self.n2 = Neuron(self.weights2, self.bias2)
    self.n3 = Neuron(self.weights3, self.bias3, activation='none')


  def ForwardAndBackward(self, X,Y, epochs=10, lr=0.001):
    for epoch in range(epochs):
      loss=0
      for x,y in zip(X,Y):

      # Run feedforward part
        self.h1 = self.n1.feedforward(x)
        self.h2 = self.n2.feedforward(x)
        inp_o = np.array([self.h1,self.h2]) # input to output neuron
        self.h3 = self.n3.feedforward(inp_o)

      # after feedforward, we calculate loss
        loss += mse_loss(self.h3, y)

      # TO update parameters - we need gradients
      ## Let's calculate them now

        self.del_h1w1 =  self.h1 *(1-self.h1)*x[0]
        self.del_h1w2 =  self.h1 *(1-self.h1)*x[1]
        self.del_h1b1 =  self.h1 *(1-self.h1)

        self.del_h2w3 =  self.h2 *(1-self.h2)*x[0]
        self.del_h2w4 =  self.h2 *(1-self.h2)*x[1]
        self.del_h2b2 =  self.h2 *(1-self.h2)

        self.del_h3w5 =  self.h1
        self.del_h3w6 =  self.h2
        self.del_h3b3 =  np.ones(1)

        self.del_h3h1 =  self.weights3[0]
        self.del_h3h2 =  self.weights3[1]

        self.del_Lossh3 = 2*(self.h3-y)

      # Now we add backpropagation part (Chain rule)

        del_L1w1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w1
        del_L1w2 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w2
        del_L1b1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1b1

        del_L1w3 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w3
        del_L1w4 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w4
        del_L1b2 = self.del_Lossh3 * self.del_h3h2 * self.del_h2b2

        del_L1w5 = self.del_Lossh3 * self.del_h3w5
        del_L1w6 = self.del_Lossh3 * self.del_h3w6
        del_L1b3 = self.del_Lossh3 * self.del_h3b3

      # update parameters
        self.weights1[0] = self.weights1[0] - lr*(del_L1w1.item()) # w1
        self.weights1[1] = self.weights1[1] - lr*(del_L1w2.item()) # w2
        self.weights2[0] = self.weights2[0] - lr*(del_L1w3.item()) # w3
        self.weights2[1] = self.weights2[1] - lr*(del_L1w4.item()) # w4
        self.weights3[0] = self.weights3[0] - lr*(del_L1w5.item()) # w5
        self.weights3[1] = self.weights3[1] - lr*(del_L1w6.item()) # w6
        self.bias1[0] = self.bias1[0] - lr*(del_L1b1.item())       # b1
        self.bias2[0] = self.bias2[0] - lr*(del_L1b2.item())       # b2
        self.bias3[0] = self.bias3[0] - lr*(del_L1b3.item())       # b3

      # print loss after each epoch
      if self.print_loss and epoch%10==0:
        print(f'Epoch: {epoch}: {x}, {y},  MSE train loss: {loss}')


  def predict(self,x):
    h1 = self.n1.feedforward(x)
    h2 = self.n2.feedforward(x)
    inp_o = np.array([h1,h2]) # input to output neuron
    h3 = self.n3.feedforward(inp_o)
    return h3



In [24]:
# y = m1x1 + m2x2 + c
X1 = np.linspace(start=0,stop=1.1, num=20)#; print('X1.shape', X1.shape)
X2 = np.linspace(start=0,stop=2.5, num=20)#; print('X2.shape', X2.shape)
# X1 and X2 needs to be of same size

n = X1.shape[0]
m1 = 3
m2 = 4.5
b1 = 4
b2 = 6
noise = np.random.randn()*5

# x0 = np.ones_like(X1); print('x0.shape', x0.shape) # not required in neural networks because Neuron has in-built bias parameter

# Multiple Linear Regression
X = np.stack([X1, X2], axis=1); print('X.shape', X.shape)
Y = m1*X1 + b1 + m2*X2 + b2 + noise; print('Y.shape', Y.shape)

weights1 = np.random.randn(2)
weights2 = np.random.randn(2)
weights3 = np.random.randn(2)
bias1 = np.random.randn(1)
bias2 = np.random.randn(1)
bias3 = np.random.randn(1)

# Train our neural network!
model = ThreeNeuronsNNRegression(weights1, bias1, weights2, bias2, weights3, bias3)
model.ForwardAndBackward(X,Y, lr=0.01, epochs=100)

print('\n           Actual -----> Predictions: ')
for x,y in zip(X,Y):
  print(f'{y} ----> {model.predict(x).item()}')


X.shape (20, 2)
Y.shape (20,)
Epoch: 0: [1.1 2.5], 17.618832058640972,  MSE train loss: 1197.9120911577654
Epoch: 10: [1.1 2.5], 17.618832058640972,  MSE train loss: 111.24470533785582
Epoch: 20: [1.1 2.5], 17.618832058640972,  MSE train loss: 23.623820481313142
Epoch: 30: [1.1 2.5], 17.618832058640972,  MSE train loss: 11.497900551449263
Epoch: 40: [1.1 2.5], 17.618832058640972,  MSE train loss: 8.732020202105623
Epoch: 50: [1.1 2.5], 17.618832058640972,  MSE train loss: 7.293855480509466
Epoch: 60: [1.1 2.5], 17.618832058640972,  MSE train loss: 6.119170393205819
Epoch: 70: [1.1 2.5], 17.618832058640972,  MSE train loss: 5.084918468373129
Epoch: 80: [1.1 2.5], 17.618832058640972,  MSE train loss: 4.190902612893781
Epoch: 90: [1.1 2.5], 17.618832058640972,  MSE train loss: 3.437477955510783

           Actual -----> Predictions: 
3.0688320586409725 ----> 4.01580196466111
3.834621532325184 ----> 4.396668018633475
4.600411006009392 ----> 4.852101655977548
5.366200479693604 ----> 5.37426

__Congrats: We have succesfully developed our NN from scratch using just numpy.__

Let's also build a neural network from scratch for __classification__.
Notice that for classification - output neuron will have a sigmoid activation. So, the gradient calculation for h3 w.r.t w5, w6, b3, h1 & h2 will change.

In [27]:
def ce_loss(y_pred, y_true):
  return -np.mean(y_true*np.log(y_pred)+ (1-y)*np.log(1-y_pred))


##################################################################
#### Developing Neural Network from scratch for Classification #######
##################################################################

class ThreeNeuronsNNClassification:

  def __init__(self, weights1, bias1, weights2, bias2, weights3, bias3, print_loss=True):
    self.print_loss = print_loss
    self.weights1 = weights1
    self.weights2 = weights2
    self.weights3 = weights3
    self.bias1 = bias1
    self.bias2 = bias2
    self.bias3 = bias3
    self.n1 = Neuron(self.weights1, self.bias1)
    self.n2 = Neuron(self.weights2, self.bias2)
    self.n3 = Neuron(self.weights3, self.bias3, activation='sigmoid')


  def ForwardAndBackward(self, X,Y, epochs=10, lr=0.001):
    for epoch in range(epochs):
      loss=0
      for x,y in zip(X,Y):

      # Run feedforward part
        self.h1 = self.n1.feedforward(x)
        self.h2 = self.n2.feedforward(x)
        inp_o = np.array([self.h1,self.h2]) # input to output neuron
        self.h3 = self.n3.feedforward(inp_o)

      # after feedforward, we calculate loss
        loss += ce_loss(self.h3, y)

      # TO update parameters - we need gradients
      ## Let's calculate them now

        self.del_h1w1 =  self.h1 *(1-self.h1)*x[0]
        self.del_h1w2 =  self.h1 *(1-self.h1)*x[1]
        self.del_h1b1 =  self.h1 *(1-self.h1)

        self.del_h2w3 =  self.h2 *(1-self.h2)*x[0]
        self.del_h2w4 =  self.h2 *(1-self.h2)*x[1]
        self.del_h2b2 =  self.h2 *(1-self.h2)

        self.del_h3w5 =  self.h3 *(1-self.h3)*self.h1 # updated
        self.del_h3w6 =  self.h3 *(1-self.h3)*self.h2 # updated
        self.del_h3b3 =  self.h3 *(1-self.h3)         # updated

        self.del_h3h1 =  self.h3 *(1-self.h3)*self.weights3[0]  # updated
        self.del_h3h2 =  self.h3 *(1-self.h3)*self.weights3[1]  # updated


        # self.del_Lossh3 = 2*(self.h3-y) # we don't use mse loss for classification
        self.del_Lossh3 = (self.h3 - y)/(self.h3*(1-self.h3)) # derivative of ce_loss

      # Now we add backpropagation part (Chain rule)

        del_L1w1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w1
        del_L1w2 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w2
        del_L1b1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1b1

        del_L1w3 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w3
        del_L1w4 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w4
        del_L1b2 = self.del_Lossh3 * self.del_h3h2 * self.del_h2b2

        del_L1w5 = self.del_Lossh3 * self.del_h3w5
        del_L1w6 = self.del_Lossh3 * self.del_h3w6
        del_L1b3 = self.del_Lossh3 * self.del_h3b3


      # update parameters
        self.weights1[0] = self.weights1[0] - lr*(del_L1w1.item()) # w1
        self.weights1[1] = self.weights1[1] - lr*(del_L1w2.item()) # w2
        self.weights2[0] = self.weights2[0] - lr*(del_L1w3.item()) # w3
        self.weights2[1] = self.weights2[1] - lr*(del_L1w4.item()) # w4
        self.weights3[0] = self.weights3[0] - lr*(del_L1w5.item()) # w5
        self.weights3[1] = self.weights3[1] - lr*(del_L1w6.item()) # w6
        self.bias1[0] = self.bias1[0] - lr*(del_L1b1.item())       # b1
        self.bias2[0] = self.bias2[0] - lr*(del_L1b2.item())       # b2
        self.bias3[0] = self.bias3[0] - lr*(del_L1b3.item())       # b3

      # print loss after each epoch
      if self.print_loss and epoch%10==0:
        print(f'Epoch: {epoch}: {x}, {y},  CE train loss: {loss}')


  def predict(self,x):
    h1 = self.n1.feedforward(x)
    h2 = self.n2.feedforward(x)
    inp_o = np.array([h1,h2]) # input to output neuron
    h3 = self.n3.feedforward(inp_o)
    return h3



In [31]:
X1 = np.linspace(start=0,stop=1.1, num=20) # heights
X2 = np.linspace(start=0,stop=2.5, num=20) # weights
X = np.stack([X1, X2], axis=1); print('X.shape', X.shape)
Y = np.concatenate([np.array([0.]*10), np.array([1.]*10)]); print('Y.shape', Y.shape)

weights1 = np.random.randn(2)
weights2 = np.random.randn(2)
weights3 = np.random.randn(2)
bias1 = np.random.randn(1)
bias2 = np.random.randn(1)
bias3 = np.random.randn(1)

# Train our neural network!
model = ThreeNeuronsNNClassification(weights1, bias1, weights2, bias2, weights3, bias3)
model.ForwardAndBackward(X,Y, lr=0.1, epochs=100)

print('\nActual -----> Predictions: ')
for x,y in zip(X,Y):
  print(f'{y} ----> {model.predict(x).item()}')


X.shape (20, 2)
Y.shape (20,)
Epoch: 0: [1.1 2.5], 1.0,  CE train loss: 10.429837304830054
Epoch: 10: [1.1 2.5], 1.0,  CE train loss: 5.554405237065651
Epoch: 20: [1.1 2.5], 1.0,  CE train loss: 4.229370788285881
Epoch: 30: [1.1 2.5], 1.0,  CE train loss: 3.139879326225674
Epoch: 40: [1.1 2.5], 1.0,  CE train loss: 2.4533604576692385
Epoch: 50: [1.1 2.5], 1.0,  CE train loss: 2.017299121212304
Epoch: 60: [1.1 2.5], 1.0,  CE train loss: 1.7236988132294926
Epoch: 70: [1.1 2.5], 1.0,  CE train loss: 1.5144718504285584
Epoch: 80: [1.1 2.5], 1.0,  CE train loss: 1.3581824488732965
Epoch: 90: [1.1 2.5], 1.0,  CE train loss: 1.2369266816322135

Actual -----> Predictions: 
0.0 ----> 0.002825865184276334
0.0 ----> 0.003368389367993497
0.0 ----> 0.00432497243680419
0.0 ----> 0.006138285892632384
0.0 ----> 0.009907984687637199
0.0 ----> 0.018631298217389958
0.0 ----> 0.04097747380255652
0.0 ----> 0.10085978056325885
0.0 ----> 0.24470623631201874
0.0 ----> 0.48406394309174516
1.0 ----> 0.712041756

__Double Congrats: We have succesfully developed another NN from scratch using just numpy.__

In [32]:
# Another helpful resource
#### https://victorzhou.com/blog/intro-to-neural-networks/