In [1]:
import numpy as np

In [204]:
class Network:
    """
    Network class used to solve the second exercise of assignment 1b.
    
    Arguments
    ---------
    sizes: list. First entry should be the # of variables in X, last entry 
        should be the number of classes to predict. 
    """
    def __init__(self, sizes, activation_fun):
        """
        num_layers : int, number of layers in the network, not including the input
        b : [np.array]
        W : [np.array]
        activation_fun : str or [str], specifies the activation functions to be used
            in each layer of the network. AssertionError if the num_layers != len(.)
        """
        self.num_layers = len(sizes) - 1
        self.sizes = sizes
        self.b = [np.random.rand(y, 1)*.1 for y in sizes[1:]]
        self.W = [np.random.randn(y, x)*.1 for x, y in zip(sizes[:-1], sizes[1:])]
        if len(sizes) > 2 and isinstance(activation_fun, str):
            self.activation_fun = [activation_fun] * self.num_layers
        elif isinstance(activation_fun, list):
            self.activation_fun = activation_fun
            assert(len(activation_fun) == len(sizes) - 2)
        elif len(sizes) == 1 and isinstance(activation_fun, str):
            self.activation_fun = [activation_fun]
            
    ### Methods for the activation functions ###
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def relu(self, z):
        return np.maximum(z, 0)
    
    def softmax(self, z):
        """
        Uses stabilization by subtracting the max in the exponent
        """
        s = np.max(z, axis=1).reshape(z.shape[0], 1)
        e_x = np.exp(z - s)
        return e_x / np.sum(e_x, axis=1).reshape(z.shape[0], 1)
    
    ###  forward propagation ###
    def linear_forward(self, h_prev, W, b):
        """
        Computes the linear transformation at layer l
        
        Arguments
        ---------
        
        """
        return h_prev.dot(W.T) + b.T
    
    def activation_forward(self, h_prev, W, b, activation_fun):
        if activation_fun == "sigmoid":
            Z = self.linear_forward(h_prev, W, b)
            h = self.sigmoid(Z)
        
        elif activation_fun == "relu":
            Z = self.linear_forward(h_prev, W, b)
            h = self.relu(Z)
            
        elif activation_fun == "softmax":
            Z = self.linear_forward(h_prev, W, b)
            h = self.softmax(Z)
            
        return h, Z
    
    def model_forward(self, X):
        activation_cache = [X]
        z_cache = []
        
        # i = 1, ... , L-1
        for i in range(self.num_layers-1):
            h_new, z_new = self.activation_forward(
                activation_cache[i], self.W[i], self.b[i], self.activation_fun[i])
            activation_cache.append(h_new)
            z_cache.append(z_new)
        
        # output layer
        h_L, z_L = self.activation_forward(activation_cache[-1], self.W[-1], self.b[-1], "softmax")
        activation_cache.append(h_L)
        z_cache.append(z_L)
        
        return activation_cache, z_cache
    
    ### Derivatives used ###
    def cost_derivative(self, h_L, y):
        """
        Based on the calculated derivative of the cross-entropy cross function
        
        Arguments
        h_L : np.array, final element of activation_cache
        y : np.array, one-hot encoded classes
        """
        return h_L - y
    
    def sigmoid_grad(self, z):
        """
        Compute gradient of sigmoid function wrt to the input
        
        Arguments
        z : np.array, linear transformation of the previous layers output
        """
        h = self.sigmoid(z)
        return h * (1-h)
    
    def relu_grad(self, z):
        """
        Compute gradient of relu function wrt to the input
        
        Arguments
        z : np.array, linear transformation of the previous layers output
        """
        h = self.relu(z)
        return np.int64(h > 0)
    
    def softmax_grad(self, z):
        """
        I'm finding this one a little hard to grasp as it should techniqually be a Jacobian?
        """
        h = self.softmax(z)
        return h * (1-h)
        

In [205]:
X = np.array([1,2,-2,2,3,4,5,6,2,-2,-1,2]).reshape(4,3)
y = np.array([0,1,0,1,0,0,0,0,1,0,1,0]).reshape(4,3)

test = Network([X.shape[1], 2, y.shape[1]], "sigmoid")

In [206]:
h, z = test.model_forward(X)

In [212]:
h[-1]

array([[0.34330207, 0.33999464, 0.3167033 ],
       [0.34086611, 0.34533241, 0.31380148],
       [0.34181393, 0.34408128, 0.31410479],
       [0.34149434, 0.34306342, 0.31544224]])