In [43]:
import numpy as np
from IPython.display import clear_output

import pyneuralnet

In [181]:
class Layer(object):
    def __init__(self, num_prev, num_neurons, activation_function='sigmoid'):
        '''
        Initialize weights randomly with He scaling.
        
        self.A -- [num_neurons, 1] Array of activations
        self.Z -- [num_neurons, 1] Array of linearly aggregated inputs
        self.W -- [num_neurons, num_prev] Array of weights applied to previous layer's activations A.
        self.b -- [num_neurons, 1] Array of biases
        ''' 
        self.n = num_neurons
        self.act = activation_function  
        
        self.A = np.zeros((num_neurons,1))
        self.Z = np.zeros((num_neurons,1))
        self.b = np.zeros((self.n,1))
        
        # He initialization.
        if self.act == 'sigmoid':
            self.W = np.random.randn(num_neurons, num_prev) * np.sqrt(2.0/num_prev)
        
    def forward(self, A_prev):
        '''
        Description:
        Perform forward pass by computing Z, then A.
        
        Parameters:
        A_prev -- [num_prev, m_samples] Array of activations from previous layer.
        
        Computes:
        self.A -- [self.n, m_samples] Array of activations for this layer.
        '''
        self.Z = self.W @ A_prev + self.b
        
        if self.act == 'linear':
            self.A = self.Z
            
        elif self.act == 'sigmoid':
            self.A = 1 / (1 + np.exp(-self.Z))
            
        return self.A
    
    def backward(self, W_next, dZ_next, A_prev):
        '''
        Description:
        Compute dA first, then dZ, then dW and db.
        
        Parameters:
        W_next -- [num_next, self.n] Array of weights between this layer and next layer.
        dZ_next -- [num_next, m_samples] Array of linear inputs to next layer.
        A_prev -- [num_prev,m_samples] Array of activations from previous layer.
        
        Computes:
        self.dA -- [self.n, m_samples] Gradient of cost w.r.t. activations.
        self.dZ -- [self.n, m_samples] Gradient of cost w.r.t. linear inputs.
        self.dW -- [self.n, num_prev] Gradient of cost w.r.t. weights.
        self.db -- [self.n, 1] Gradient of cost w.r.t. biases.
        '''        
        m = self.A.shape[1]
        self.dA = W_next.T @ dZ_next
        
        if self.act == 'linear':
            self.dZ = self.dA
            
        elif self.act == 'sigmoid':
            self.dZ = self.A * (1 - self.A) * self.dA
            
        self.dW = 1 / m * self.dZ @ A_prev.T
        self.db = 1 / m * np.sum(self.dZ, axis=1, keepdims=True)
        
    def update(self, learning_rate):
        self.W = self.W - learning_rate * self.dW
        self.b = self.b - learning_rate * self.db
    

def costMSE(H, Y):
    '''
    Parameters:
    H -- [1, m] Predicted values.
    Y -- [1, m] Target values.
    
    Returns:
    J -- (float) Mean squared error of dataset.
    grad -- [1, m] Gradient of cost w.r.t. predicted values.
    ''' 
    m = H.shape[1]
    J = 1 / (2 * m) * (H - Y) @ (H - Y).T
    grad = 1 / m * (H - Y)
    return J, grad


def costLogistic(H, Y):
    '''
    Parameters:
    H -- [n, m] Predicted values in range (0, 1).
    Y -- [n, m] Target values, either 0 or 1.
    
    Returns:
    J -- [n, 1] Logistic cost of dataset.
    grad -- [n, 1] Gradient of cost w.r.t. predicted values.
    
    '''
    m = H.shape[1]
    J = -1 / m * np.sum(Y * np.log(H) + (1 - Y) * np.log(1 - H), axis=1)
    grad = 1 / m * np.sum(np.divide(-Y, H) + np.divide(1 - Y, 1 - H), axis=1, keepdims=True)
    return J, grad

In [42]:
layout = ((3, 'sigmoid'),
          (2, 'sigmoid'), 
          (1, 'sigmoid'),
         ) 

network = buildNetwork(layout, 3)

Layer 1: (3, 'sigmoid')
Layer 2 : (2, 'sigmoid')
Layer 3 : (1, 'sigmoid')


## Gradient Checking Tool


In [180]:
def function(H, Y):
    m = H.shape[1]
    return -1 / m * np.sum(Y * np.log(H) + (1 - Y) * np.log(1 - H), axis=1, keepdims=True)

def gradient(H, Y):
    m = H.shape[1]
    return 1 / m * np.sum(np.divide(-Y, H) + np.divide(1 - Y, 1 - H), axis=1, keepdims=True)

np.random.seed(20)
Z = np.random.rand(2, 3)
print('Input\n', Z, '\n')

Y = np.array([[0, 1, 1],
              [1, 0, 0]])
print('Correct Answer\n', Y, '\n')

A = function(Z, Y)
print('Output\n', A, '\n')

dAdZ = gradient(Z, Y)
print('Gradient\n', dAdZ, '\n')

eps = 1e-5
dAdZ_check = np.zeros(Z.shape)

for i in range(Z.shape[0]): 
    for j in range(Z.shape[1]):
        cop = np.copy(Z)
        bp[i,j] = cop[i,j] + eps
        bn[i,j] = cop[i,j] - eps
        plus = function(bp, Y)
        minus = function(bn, Y)
        dAdZ_check[i,j] = (plus[i] - minus[i]) / (2 * eps)
print('Gradient Check\n', dAdZ_check)

Input
 [[0.5881308  0.89771373 0.89153073]
 [0.81583748 0.03588959 0.69175758]] 

Correct Answer
 [[0 1 1]
 [1 0 0]] 

Output
 [[0.36992296]
 [0.47231943]] 

Gradient
 [[0.06411603]
 [1.0185637 ]] 

Gradient Check
 [[0.06411603 0.06411603 0.06411603]
 [1.0185637  1.0185637  1.0185637 ]]
