### Credit to http://cs231n.github.io/optimization-2/

### Imports

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import math
from IPython.display import Image, SVG
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

### A simple neural network - representing sigmoid function (a.k.a Logistic regression)

![title](Neural-Network-1.png)

### The function that the above neural network represents

# $f(w,x) = \frac{1}{1+e^{-(w_0x_0 + w_1x_1 + w_2)}}$ #

### Forwardprop and Backprop through sigmoid

In [2]:
W = [2,-3,-3] # assume some random weights and data
X = [-1, -2]

# Forward pass
dot = W[0]*X[0] + W[1]*X[1]
dotplusw2 = dot + W[2]
minus_dot = (-1) * dotplusw2
exp_dot = math.exp(minus_dot)
f = 1.0 / (1 + exp_dot) # sigmoid function

# Backprop
df = 1
dplusone = (-1)*(f*f)
dexp = dplusone
dminusone = exp_dot * dexp
ddotplusw2 = (-1) * dminusone
dw2 = ddotplusw2
ddot = ddotplusw2
dw0x0 = ddot
dw1x1 = ddot
dw0 = X[0] * dw0x0
dx0 = W[0] * dw0x0
dw1 = X[1] * dw1x1
dx1 = W[1] * dw1x1

print dplusone, dexp, dminusone, ddotplusw2, ddot, dw0x0, dw1x1

print dw0, dw1, dx0, dx1, dw2

-0.534446645389 -0.534446645389 -0.196611933241 0.196611933241 0.196611933241 0.196611933241 0.196611933241
-0.196611933241 -0.393223866483 0.393223866483 -0.589835799724 0.196611933241


# $\sigma(x) = \frac{1}{1+e^{-x}}$#
# $\rightarrow \hspace{0.3in} \frac{d\sigma(x)}{dx} = \frac{e^{-x}}{(1+e^{-x})^2} = \left( \frac{1 + e^{-x} - 1}{1 + e^{-x}} \right) \left( \frac{1}{1+e^{-x}} \right) = \left( 1 - \sigma(x) \right) \sigma(x)$ #

So one could essentially avoid all the individual sigmoid computations and use the above formula

In [3]:
(1-f) * f

0.19661193324148185

In [4]:
((1-f) * f) - ddotplusw2

-2.7755575615628914e-17

### A slightly complex function

# $f(x,y) = \frac{x + \sigma(y)}{\sigma(x) + (x+y)^2}$ #

### Neural network for the above function (excuse the drawing :D)

![title](Neural-Network-2.svg)

### Forward and backprop through the above neural network

In [5]:
x = 3 # example values
y = -4

# forward pass
sigy = 1.0 / (1 + math.exp(-y)) # sigmoid in numerator   #(1)
num = x + sigy # numerator                               #(2)
sigx = 1.0 / (1 + math.exp(-x)) # sigmoid in denominator #(3)
xpy = x + y                                              #(4)
xpysqr = xpy**2                                          #(5)
den = sigx + xpysqr # denominator                        #(6)
invden = 1.0 / den                                       #(7)
f = num * invden # done!                                 #(8)

# backprop f = num * invden
dnum = invden # gradient on numerator                             #(8)
dinvden = num                                                     #(8)
# backprop invden = 1.0 / den 
dden = (-1.0 / (den**2)) * dinvden                                #(7)
# backprop den = sigx + xpysqr
dsigx = (1) * dden                                                #(6)
dxpysqr = (1) * dden                                              #(6)
# backprop xpysqr = xpy**2
dxpy = (2 * xpy) * dxpysqr                                        #(5)
# backprop xpy = x + y
dx = (1) * dxpy                                                   #(4)
dy = (1) * dxpy                                                   #(4)
# backprop sigx = 1.0 / (1 + math.exp(-x))
dx += ((1 - sigx) * sigx) * dsigx # Gradients add up at forks  #(3)
# backprop num = x + sigy
dx += (1) * dnum # Gradients add up at forks                                                 #(2)
dsigy = (1) * dnum                                                #(2)
# backprop sigy = 1.0 / (1 + math.exp(-y))
dy += ((1 - sigy) * sigy) * dsigy # Gradients add up at forks

print dx, dy

2.05956979557 1.59223275148
