In [34]:
import numpy as np
import numpy.matlib

In [35]:
# Generic Layer class 
#        + ---- -+
# z ---> | layer | -- f(z) -->
#        +-------+ <-------- (dz/dx)_i+1 ------
#         |    |
#    df(z)/dx_i  df(z)/dW_i

class layer(object):
    
    def forward(self, param):
        raise NotImplementedError

    def backward(self, param):
        raise NotImplementedError


In [36]:
# Inner layer class
# Ecuation:
#    z = W*x + b 
#   dz/dx_i = W*(dz/dx_i+1) 
#   dz/dw_i = x*(dz/dx_i+1) 
#
class inner(layer):

    # z = W*x + b 
    def forward(self, param): 
        return np.dot(param['w'],param['x']);

    # dz/dx_i = W*(dz/dx_i+1) 
    # dz/dw_i = x*(dz/dx_i+1)
    def backward(self, param):
        return np.dot(param['w'].T, param['dzdx']), np.dot(param['dzdx'],param['x'].T); 
    

In [104]:
# Sigmoid layer class
# Ecuation:
#   z = sigm(x) = 1/(1+exp(-x))
#   dz/dx = sigm(x)(1-sigm(x)) *(dz/dx_i+1) 
#
class sigm(layer):
    
    def forward(self, param): 
        return self._sigm(param['x']);

    def backward(self, param):
        return self._sigm(param['x'])*(1.0-self._sigm(param['x']))*param['dzdx']; 

    def _sigm(self, x): return 1.0/(1.0 + np.exp(-x));
    

In [109]:
# Loss layer class
# Ecuation:
#   z = 1/2 (y-x)^2
#   dz/dx = (x-y)*(dz/dx_i+1) 
#   note: dz/dx_n = 1  
#
class loss(layer):
    
    def forward(self, param):         
        return  (1.0/(2.0*len(param['y']))*np.sum((param['y']-param['x'])**2));

    def backward(self, param):
        return (param['x']-param['y'])*param['dzdx']; 


In [110]:
# Create cost function neural netword
# Demo
#         +--------+    +--------+     +--------+              +--------+
#   ----> | inner  |--> | inner  |---> | sigm   | ---->(*)---> | loss   |
#         +--------+    +--------+     +--------+              +--------+

# Const function
def costFunc( x, w1, w2):
    '''
    forward function
    Entrada:
        * x vector nxm. m featurs
        * w1, w2 weigths
    Return:
        * z4
    
    Note: b=0
    '''

    z1 = x;
    z2 = inner().forward({'x':z1, 'w':w1}) 
    z3 = inner().forward({'x':z2, 'w':w2}) 
    z4 =  sigm().forward({'x':z3});
    return z4;




In [111]:
# data
x = np.matrix([[1,2,3]], dtype=np.float64).T;
y = np.matrix([5.0], dtype=np.float64);

# init weigth
w1 = np.matrix([[0.1, 0.2, 0.3],[0.1, 0.2, 0.3]], dtype=np.float64)
w2 = np.matrix([0.8, 0.2], dtype=np.float64)


y_ = costFunc( x, w1, w2);
e = loss().forward({'x':y_,'y':y});
print(e)



8.81083005274


In [22]:
# Gradind function
def gradCostFunc(x, y, w1, w2):
    '''
    Apply backward function
    Compute dervative with respect to w1 and w2
    '''
    
    # forward --->
    z1 = x;
    z2 = inner().forward({'x':z1, 'w':w1}) 
    z3 = inner().forward({'x':z2, 'w':w2}) 
    z4 =  sigm().forward({'x':z3});
    z5 = loss().forward({'x':z4, 'y':y});

    E = z5;

    # <--- backward
    l5 = 1;
    l4 = loss().backward({'x':z4, 'y':y, 'dzdx':l5} );    
    l3 = sigm().backward({'x':z3, 'dzdx':l4});
    l2, dEdW2 = inner().backward({'x':z2, 'w':w2, 'dzdx':l3});
    _ , dEdW1 = inner().backward({'x':z1, 'w':w1, 'dzdx':l2});


    return E, dEdW1, dEdW2

In [None]:
# derivate
# grad J(x)
E, dEdW1, dEdW2 = gradCostFunc(x, y, w1, w2);


# minimization with gradien decent
# w^t = w^(t-1) + lr*gardJ

print(E)
print(dEdW1)
print(dEdW2)