In [21]:
import numpy as np
from IPython.display import clear_output

import pyneuralnet

In [41]:
class Layer(object):
    def __init__(self, num_prev, num_neurons, activation_function='sigmoid'):
        '''
        Initialize weights randomly with He scaling.
        
        self.A -- [num_neurons, 1] Array of activations
        self.Z -- [num_neurons, 1] Array of linearly aggregated inputs
        self.W -- [num_neurons, num_prev] Array of weights applied to previous layer's activations A.
        self.b -- [num_neurons, 1] Array of biases
        ''' 
        self.n = num_neurons
        self.act = activation_function  
        
        self.A = np.zeros((num_neurons,1))
        self.Z = np.zeros((num_neurons,1))
        self.b = np.zeros((self.n,1))
        
        # He initialization.
        if self.act == 'sigmoid':
            self.W = np.random.randn(num_neurons, num_prev) * np.sqrt(2.0/num_prev)
        
    def forward(self, A_prev):
        '''
        Description:
        Perform forward pass by computing Z, then A.
        
        Parameters:
        A_prev -- [num_prev, m_samples] Array of activations from previous layer.
        
        Computes:
        self.A -- [self.n, m_samples] Array of activations for this layer.
        '''
        self.Z = self.W @ A_prev + self.b
        
        if self.act == 'linear':
            self.A = self.Z
            
        elif self.act == 'sigmoid':
            self.A = 1 / (1 + np.exp(-self.Z))
            
        return self.A
    
    def backward(self, W_next, dZ_next, A_prev):
        '''
        Description:
        Compute dA first, then dZ, then dW and db.
        
        Parameters:
        W_next -- [num_next, self.n] Array of weights between this layer and next layer.
        dZ_next -- [num_next, m_samples] Array of linear inputs to next layer.
        A_prev -- [num_prev,m_samples] Array of activations from previous layer.
        
        Computes:
        self.dA -- [self.n, m_samples] Gradient of cost w.r.t. activations.
        self.dZ -- [self.n, m_samples] Gradient of cost w.r.t. linear inputs.
        self.dW -- [self.n, num_prev] Gradient of cost w.r.t. weights.
        self.db -- [self.n, 1] Gradient of cost w.r.t. biases.
        '''        
        m = self.A.shape[1]
        self.dA = W_next.T @ dZ_next
        
        if self.act == 'linear':
            self.dZ = self.dA
            
        elif self.act == 'sigmoid':
            self.dZ = self.A * (1 - self.A) * self.dA
            
        self.dW = 1 / m * self.dZ @ A_prev.T
        self.db = 1 / m * np.sum(self.dZ, axis=1, keepdims=True)
        
    def update(self, learning_rate):
        self.W = self.W - learning_rate * self.dW
        self.b = self.b - learning_rate * self.db
    

def buildNetwork(layout, num_features):
    '''
    Arguments:
    layout -- (num_layers, 2) Tuple where first column contains number of neurons in each layer 
                and second column contains activation functions for each layer.
                
    Returns:
    network -- (num_layers,:) List of layers, each with number of neurons specified in 'network'.
    '''
    network = []
    network.append(Layer(num_features, layout[0][0], layout[0][1]))
    print('Layer 1:', layout[0])
    
    for i in range(1, len(layout)):
        num_prev = layout[i-1][0]
        num_neurons = layout[i][0]
        activation_function = layout[i][1]
        network.append(Layer(num_prev, num_neurons, activation_function))
        print('Layer', i+1, '\b:', layout[i])
        
    return network

In [42]:
layout = ((3, 'sigmoid'),
          (2, 'sigmoid'), 
          (1, 'sigmoid'),
         ) 

network = buildNetwork(layout, 3)

Layer 1: (3, 'sigmoid')
Layer 2 : (2, 'sigmoid')
Layer 3 : (1, 'sigmoid')


## Gradient Checking Tool


In [37]:
def activation(Z):
    return 1 / (1 + np.exp(-Z))

def gradient(Z):
    return A * (1 - A)

Z = np.array([[3, 5, -2]])
print('Input\n', Z, '\n')

A = activation(Z)
print('Output\n', A, '\n')

dAdZ = gradient(Z)
print('Gradient\n', dAdZ, '\n')

eps = 1e-5
plus = activation(Z + eps)
minus = activation(Z - eps)
dAdZ_check = (plus - minus) / (2 * eps)
print('Gradient Check\n', dAdZ_check)

Input
 [[ 3  5 -2]] 

Output
 [[0.95257413 0.99330715 0.11920292]] 

Gradient
 [[0.04517666 0.00664806 0.10499359]] 

Gradient Check
 [[0.04517666 0.00664806 0.10499359]]
