In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Model

$$y = 1 / (1+e^{-{w^TX+b}}) $$ 

In [0]:
# we are given a dataset with 90 items with 5 columns for each
wt = np.random.rand(5,1)
X = np.random.rand(90,5)
Y = np.random.rand(90,1)
b = -0.5

def sigmoid_neuron(w,b,x):
  Y_pred = 1/(1+np.exp(-(np.dot(x,wt) + b)))
  return Y_pred

Y_pred = sigmoid_neuron(wt,b,X[0,:])

## Loss Function

$$ Loss = \sum_i^n (y_i^{pred}-y_i^{actual})^2$$

In [26]:
def loss_function(Y_pred,Y):
  return np.sum(np.power((Y_pred-Y),2))

loss_function(Y_pred,Y)

9.217312004907198

## Graient descent (Learning) - Parameter Update 


$$w_{t+1} = w_t - \eta\Delta w_t$$

$$b_{t+1} = b_t - \eta\Delta b_t$$

where

$$\Delta w_t = \frac{\partial L(w,b)} {\partial w},  \Delta b_t = \frac{\partial L(w,b)} {\partial b}, \eta = step$$

Derivatives are given by - 

$$\Delta w_t = \sum_i^n (f(x_i)-y_i)(f(x_i))(1-f(x_i))x_i $$
 $$\Delta b_t = \sum_i^n (f(x_i)-y_i)(f(x_i))(1-f(x_i)) $$



In [0]:
def grad_w(w,b,x,y):
  f_x =  sigmoid_neuron(w,b,x)
  return (f_x-y)*(f_x)*(1-f_x)*x

def grad_b(w,b,x,y):
  f_x =  sigmoid_neuron(w,b,x)
  return (f_x-y)*(f_x)*(1-f_x)*x

def do_gradient_descent(eta=0.1,max_epochs=100):
  w,b = 0,0
  dw,db = 0,0
  for x,y in zip(X,Y):
    dw += grad_w(w,b,x,y)
    db += grad_b(w,b,x,y)
  w = w - eta*dw
  b = b - eta*db