In [2]:
import numpy as np

__We show why feature normalization is crucial for training Neural Networks with an experiemntal setup for Linear Regression in 2 variables__

### Simple Neural Network for Regression
  - no activation at output neuron

<img src="imgs/neuron1.jpg" alt="Neuron1" style="width:400px;"/>


<img src="imgs/neuron2.jpg" alt="Neuron2" style="width:400px;"/>


In [4]:

def sigmoid(x):
  return 1/(1+np.exp(-x))

def mse_loss(y_pred, y_true):
  return np.mean((y_pred-y_true)**2)

def sigmoid_derivative(x):
  return sigmoid(x)*(1-sigmoid(x))

##################################################################
#### Developing Neural Network from scratch for Regression #######
##################################################################


class Neuron:
  def __init__(self, weights, bias, activation = 'sigmoid'):
    self.weights = weights
    self.bias = bias
    self.activation = activation

  def feedforward(self, inputs):
    #self.inputs = self.inputs
    z = np.dot(self.weights, inputs) + self.bias
    if self.activation=='sigmoid':
      h = sigmoid(z)
    else:
      h = z
    return h


class ThreeNeuronsNNRegression:

  def __init__(self, weights1, bias1, weights2, bias2, weights3, bias3, print_loss=True):
    self.print_loss = print_loss
    self.weights1 = weights1
    self.weights2 = weights2
    self.weights3 = weights3
    self.bias1 = bias1
    self.bias2 = bias2
    self.bias3 = bias3
    self.n1 = Neuron(self.weights1, self.bias1)
    self.n2 = Neuron(self.weights2, self.bias2)
    self.n3 = Neuron(self.weights3, self.bias3, activation='none')


  def ForwardAndBackward(self, X,Y, epochs=10, lr=0.001):
    for epoch in range(epochs):
      loss=0
      for x,y in zip(X,Y):

      # Run feedforward part
        self.h1 = self.n1.feedforward(x)
        self.h2 = self.n2.feedforward(x)
        inp_o = np.array([self.h1,self.h2]) # input to output neuron
        self.h3 = self.n3.feedforward(inp_o)

      # after feedforward, we calculate loss
        loss += mse_loss(self.h3, y)

      # TO update parameters - we need gradients
      ## Let's calculate them now

        self.del_h1w1 =  self.h1 *(1-self.h1)*x[0]
        self.del_h1w2 =  self.h1 *(1-self.h1)*x[1]
        self.del_h1b1 =  self.h1 *(1-self.h1)

        self.del_h2w3 =  self.h2 *(1-self.h2)*x[0]
        self.del_h2w4 =  self.h2 *(1-self.h2)*x[1]
        self.del_h2b2 =  self.h2 *(1-self.h2)

        self.del_h3w5 =  self.h1
        self.del_h3w6 =  self.h2
        self.del_h3b3 =  np.ones(1)

        self.del_h3h1 =  self.weights3[0]
        self.del_h3h2 =  self.weights3[1]

        self.del_Lossh3 = 2*(self.h3-y)

      # Now we add backpropagation part (Chain rule)

        del_L1w1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w1
        del_L1w2 = self.del_Lossh3 * self.del_h3h1 * self.del_h1w2
        del_L1b1 = self.del_Lossh3 * self.del_h3h1 * self.del_h1b1

        del_L1w3 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w3
        del_L1w4 = self.del_Lossh3 * self.del_h3h2 * self.del_h2w4
        del_L1b2 = self.del_Lossh3 * self.del_h3h2 * self.del_h2b2

        del_L1w5 = self.del_Lossh3 * self.del_h3w5
        del_L1w6 = self.del_Lossh3 * self.del_h3w6
        del_L1b3 = self.del_Lossh3 * self.del_h3b3

      # update parameters
        self.weights1[0] = self.weights1[0] - lr*(del_L1w1.item()) # w1
        self.weights1[1] = self.weights1[1] - lr*(del_L1w2.item()) # w2
        self.weights2[0] = self.weights2[0] - lr*(del_L1w3.item()) # w3
        self.weights2[1] = self.weights2[1] - lr*(del_L1w4.item()) # w4
        self.weights3[0] = self.weights3[0] - lr*(del_L1w5.item()) # w5
        self.weights3[1] = self.weights3[1] - lr*(del_L1w6.item()) # w6
        self.bias1[0] = self.bias1[0] - lr*(del_L1b1.item())       # b1
        self.bias2[0] = self.bias2[0] - lr*(del_L1b2.item())       # b2
        self.bias3[0] = self.bias3[0] - lr*(del_L1b3.item())       # b3

      # print loss after each epoch
      if self.print_loss and epoch%10==0:
        print(f'Epoch: {epoch}: {x}, {y},  MSE train loss: {loss}')


  def predict(self,x):
    h1 = self.n1.feedforward(x)
    h2 = self.n2.feedforward(x)
    inp_o = np.array([h1,h2]) # input to output neuron
    h3 = self.n3.feedforward(inp_o)
    return h3



## Case-1: Both X1 and X2 have about the same range and close to 0 - 1

In [8]:
# y = m1x1 + m2x2 + c
X1 = np.linspace(start=0,stop=1.1, num=20)#; print('X1.shape', X1.shape)
X2 = np.linspace(start=0,stop=2.5, num=20)#; print('X2.shape', X2.shape)
# X1 and X2 needs to be of same size

n = X1.shape[0]
m1 = 3
m2 = 4.5
b1 = 4
b2 = 6
noise = np.random.randn()*5

# x0 = np.ones_like(X1); print('x0.shape', x0.shape) # not required in neural networks because Neuron has in-built bias parameter

# Multiple Linear Regression
X = np.stack([X1, X2], axis=1); print('X.shape', X.shape)
Y = m1*X1 + b1 + m2*X2 + b2 + noise; print('Y.shape', Y.shape)

weights1 = np.random.randn(2)
weights2 = np.random.randn(2)
weights3 = np.random.randn(2)
bias1 = np.random.randn(1)
bias2 = np.random.randn(1)
bias3 = np.random.randn(1)

# Train our neural network!
model = ThreeNeuronsNNRegression(weights1, bias1, weights2, bias2, weights3, bias3)
model.ForwardAndBackward(X,Y, lr=0.01, epochs=100)

print('\n           Actual -----> Predictions: ')
for x,y in zip(X,Y):
  print(f'{y} ----> {model.predict(x).item()}')


X.shape (20, 2)
Y.shape (20,)
Epoch: 0: [1.1 2.5], 30.6580555957063,  MSE train loss: 6593.7024903588135
Epoch: 10: [1.1 2.5], 30.6580555957063,  MSE train loss: 28.033292616935647
Epoch: 20: [1.1 2.5], 30.6580555957063,  MSE train loss: 6.643828100887328
Epoch: 30: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.69129849662327
Epoch: 40: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.0714037618358416
Epoch: 50: [1.1 2.5], 30.6580555957063,  MSE train loss: 2.979279670857386
Epoch: 60: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.0022128492885662
Epoch: 70: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.0319872127089145
Epoch: 80: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.042588676023509
Epoch: 90: [1.1 2.5], 30.6580555957063,  MSE train loss: 3.031479494544288

           Actual -----> Predictions: 
16.1080555957063 ----> 17.144008767070492
16.873845069390512 ----> 17.443150690741483
17.63963454307472 ----> 17.800451833342382
18.405424016758932 ----> 18.227718253878454
19.1

### We see that model converges well in Case-1

## Case-2: Both X1 and X2 have about the same range and >> 0 - 1

In [11]:
# y = m1x1 + m2x2 + c
X1 = np.linspace(start=10,stop=11.1, num=20)#; print('X1.shape', X1.shape)
X2 = np.linspace(start=10,stop=12.5, num=20)#; print('X2.shape', X2.shape)
# X1 and X2 needs to be of same size

n = X1.shape[0]
m1 = 3
m2 = 4.5
b1 = 4
b2 = 6
noise = np.random.randn()*5

# x0 = np.ones_like(X1); print('x0.shape', x0.shape) # not required in neural networks because Neuron has in-built bias parameter

# Multiple Linear Regression
X = np.stack([X1, X2], axis=1); print('X.shape', X.shape)
Y = m1*X1 + b1 + m2*X2 + b2 + noise; print('Y.shape', Y.shape)

weights1 = np.random.randn(2)
weights2 = np.random.randn(2)
weights3 = np.random.randn(2)
bias1 = np.random.randn(1)
bias2 = np.random.randn(1)
bias3 = np.random.randn(1)

# Train our neural network!
model = ThreeNeuronsNNRegression(weights1, bias1, weights2, bias2, weights3, bias3)
model.ForwardAndBackward(X,Y, lr=0.01, epochs=100)

print('\n           Actual -----> Predictions: ')
for x,y in zip(X,Y):
  print(f'{y} ----> {model.predict(x).item()}')


X.shape (20, 2)
Y.shape (20,)
Epoch: 0: [11.1 12.5], 102.88572777832591,  MSE train loss: 91971.89530364238
Epoch: 10: [11.1 12.5], 102.88572777832591,  MSE train loss: 400.8711939265631
Epoch: 20: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.6620718395914
Epoch: 30: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.6622781985129
Epoch: 40: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.66225747501807
Epoch: 50: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.662236784727
Epoch: 60: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.6622161664455
Epoch: 70: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.6621955946159
Epoch: 80: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.66217504382035
Epoch: 90: [11.1 12.5], 102.88572777832591,  MSE train loss: 401.6621544886955

           Actual -----> Predictions: 
88.33572777832592 ----> 96.63876022337257
89.10151725201013 ----> 96.63876103738897
89.86730672569435 ----> 96.63876164050272
90.63309619937

#### We see that model error now saturates in relatively higher range of 404. Also all predictions are of same values ==> Not a good convergence.

## Case-3: X1 in range 0-1 and X2 inrange > > 0 - 1

In [14]:
# y = m1x1 + m2x2 + c
X1 = np.linspace(start=0,stop=1.1, num=20)#; print('X1.shape', X1.shape)
X2 = np.linspace(start=10,stop=12.5, num=20)#; print('X2.shape', X2.shape)
# X1 and X2 needs to be of same size

n = X1.shape[0]
m1 = 3
m2 = 4.5
b1 = 4
b2 = 6
noise = np.random.randn()*5

# x0 = np.ones_like(X1); print('x0.shape', x0.shape) # not required in neural networks because Neuron has in-built bias parameter

# Multiple Linear Regression
X = np.stack([X1, X2], axis=1); print('X.shape', X.shape)
Y = m1*X1 + b1 + m2*X2 + b2 + noise; print('Y.shape', Y.shape)

weights1 = np.random.randn(2)
weights2 = np.random.randn(2)
weights3 = np.random.randn(2)
bias1 = np.random.randn(1)
bias2 = np.random.randn(1)
bias3 = np.random.randn(1)

# Train our neural network!
model = ThreeNeuronsNNRegression(weights1, bias1, weights2, bias2, weights3, bias3)
model.ForwardAndBackward(X,Y, lr=0.01, epochs=100)

print('\n           Actual -----> Predictions: ')
for x,y in zip(X,Y):
  print(f'{y} ----> {model.predict(x).item()}')


X.shape (20, 2)
Y.shape (20,)
Epoch: 0: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 55113.28697711192
Epoch: 10: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.0067815050575
Epoch: 20: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6619647360742
Epoch: 30: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.66215269394803
Epoch: 40: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.66215274744155
Epoch: 50: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6621527474567
Epoch: 60: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6621527474567
Epoch: 70: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6621527474567
Epoch: 80: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6621527474567
Epoch: 90: [ 1.1 12.5], 73.0336991110552,  MSE train loss: 401.6621527474567

           Actual -----> Predictions: 
58.4836991110552 ----> 66.78673337688892
59.24948858473942 ----> 66.78673337689855
60.015278058423625 ----> 66.7867333769059
60.78106753210783 ----> 66.7867333

### Case-3 also suffers from same problem as Case-2 ==> Not a good fit ==> Normalisation is important