## Neural Network and Convolutional Neural Network Practice

In step 3-2, we want to implement computational graph concept into our NN and CNN program. Thus we have to rewrite all operations into layers. It's easier to compute the forward and backward propagation in this way. 

In [4]:
import numpy as np

In [27]:
# helper functions

def softmax(x):
    if x.ndim == 2:
        x = x.T # convert x_i from row to column
        x = x - np.max(x, axis = 0) # axis = 0: vertical 
        y = np.exp(x) / np.sum(np.exp(x), axis = 0)
        return y.T # transpose back to original format

    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

# for one-hot-encoding label
def cross_entropy_error(y, t):
    delta = 1e-7
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y+delta)) / batch_size

### Add and Multiply Layer

In [5]:
# df/dx = df/dz * dz/dx
# z = x + y, dz/dx = 1
class AddLayer():
    def __init__(self):
        pass
    def forward(self, x, y):
        return x + y
    def backward(self, dfdz):
        return dfdz*1, dfdz*1

In [12]:
# z = x * y, df/dx = df/dz * dz/dx, dz/dx = y
class MulLayer():
    def __init__(self):
        self.x = self.y = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        return x * y
    def backward(self, dfdz):
        return dfdz * self.y, dfdz * self.x

### Test our classes

Test a simple case $f(x_i,y_i,r) = (x_1*y_1+x_2*y_2)*r$, then check $df/dr = (x_1*y_1+x_2*y_2)$ and $df/d(x_1y_1+x_2y_2) = r$

In [28]:
# initiate values
x1 = 100
y1 = 2
x2 = 150
y2 = 3
r = 1.1

# compute forward, ie f = ( x1*y1 + x2*y2 ) * r
# initiate layers according to corresponding computation graph
x1y1_layer = MulLayer()
x2y2_layer = MulLayer()
x1y1_x2y2_layer = AddLayer()
xyr_layer = MulLayer()

# computate forward
x1y1 = x1y1_layer.forward(x1,y1)
x2y2 = x2y2_layer.forward(x2,y2)
x1y1_x2y2 = x1y1_x2y2_layer.forward(x1y1,x2y2)
xyr = xyr_layer.forward(x1y1_x2y2, r)
print('Forward check: ')
print('( x1*y1 + x2*y2 ) * r =' + str(xyr))
print('')

# compute backward
df = 1
dall, dfdr = xyr_layer.backward(df)
dx1y1, dx2y2 = x1y1_x2y2_layer.backward(dall)
dx1, dy1 = x1y1_layer.backward(dx1y1)
dx2, dy2 = x2y2_layer.backward(dx2y2)
print('Backward check: ')
print('x1*y1 + x2*y2 = ' + str(x1y1_x2y2))
print('r = ' + str(r))
print('df/dr =' + str(dfdr))
print('df/dxy = ' + str(dall))

Forward check: 
( x1*y1 + x2*y2 ) * r =715.0000000000001

Backward check: 
x1*y1 + x2*y2 = 650
r = 1.1
df/dr =650
df/dxy = 1.1


### Relu and Sigmoid Layer

In [13]:
# z = x if x > 0, 0 if x <= 0, df/dx = df/dz * dz/dx, dz/dx = 1 if x > 0, 0 if x <= 0
class Relu():
    def __init__(self):
        self.mask = None
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    def backward(self, dfdz):
        out = dfdz.copy()
        out[self.mask] = 0
        return out

In [14]:
# z = 1 / (1 + exp(-x) ), 
# dz/dx = z^2*exp(-x) = 1/(1+exp(-x))^2 * exp(-x) = 1/(1+exp(-x)) * exp(-x)/(1+exp(-x)) = z * (1-z)
class Sigmoid():
    def __init__(self):
        self.out = None
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-x))
        return self.out
    def backward(self, dfdz):
        return dfdz * (1- self.out) * self.out

### Affine Layer

Previous step, we processed input value using matrix operation: $ Y = X\cdot W + B $. We can rewrite there operations as a layer like we just did. This layer usually called Affine Layer in Neural Netwrok.

In [25]:
class Affine():
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.x = x
        return np.dot(x, self.W) + self.b
    
    def backwrd(self,dfdy):
        dx = np.dot(dfdy, self.W.T)
        self.dW = np.dot(self.x.T, dfdy)
        self.db = np.sum(dfdy, axis = 0)
        
        return dx

In [26]:
class SoftmaxWithError():
    def __init__(self):
        self.error = None
        self.y_h = None
        self.y = None
    
    def forward(self, x, y):
        self.y = y
        self.y_h = softmax(x)
        self.error = cross_entropy_error(self.y_h, self.y)
        
        return self.error
    
    def backward(self):
        batch_data_size = self.y.shape[0]
        dx = (self.y_h - self.y) / batch_data_size
        
        return dx

### Rebuild Two Layer Network Using Layers

In [29]:
from collections import OrderedDict

In [30]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        self.params = {}
        
        # 1st layer size: from input to cell size
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        
        # 2nd layer size: from cell size to output size
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        # generate layers
        self.layers = OrderedDict()
        self.layers['Affine_layer_1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu_layer_1' ] = Relu()
        self.layers['Affine_layer_1'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithError()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def error(self, x, y):
        y_h = self.predict(x)
        return self.last_layer.forward(y_h, y)
    
    def accuracy(self, x, y):
        y_h = self.predict(x)
        y_h = np.argmax(y_h, axis = 1)
        y = np.argmax(y, axis = 1)
        return np.sum(y_h == y)/float(x.shape[0])
    
    def gradient(self, x, y):
        # forward
        self.error(x, y)
        
        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads