In [52]:
import numpy as np
size = 100
x11 = np.random.uniform(low=0.0,high=1.0,size=size)
x12 = np.random.uniform(low=2.0,high=8.0,size=size)
x13 = np.random.uniform(low=6.0,high=8.0,size=size)

x21 = np.random.uniform(low=10.0,high=11.0,size=size)
x22 = np.random.uniform(low=12.0,high=18.0,size=size)
x23 = np.random.uniform(low=16.0,high=18.0,size=size)

x31 = np.random.uniform(low=20.0,high=21.0,size=size)
x32 = np.random.uniform(low=22.0,high=28.0,size=size)
x33 = np.random.uniform(low=26.0,high=28.0,size=size)

y1 = np.transpose(np.zeros(100))
y2 = np.transpose(np.ones(100))
y3 = np.transpose(np.ones(100) * 2)

X1 = np.transpose(np.array([x11, x12, x13]))
X2 = np.transpose(np.array([x21, x22, x23]))
X3 = np.transpose(np.array([x31, x32, x33]))
X = np.matrix((np.vstack((X1, X2, X3)))).T

y = np.matrix((np.hstack((y1, y2, y3)))).T


In [53]:
import numpy as np
import logging
formatter = logging.Formatter('%(message)s')


def activation_function(x, activation = 'sigmoid'):
    if (activation == 'sigmoid'):
        return 1 / (1 + np.exp(-x))
    elif (activation == 'tanh'):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
    elif activation == 'relu':
        x[np.where(x < 0)] = 0.0
        return x
    else:
        return x

def output_function(x, activation = 'softmax'):
    if (activation == 'softmax'):
        x = np.exp(x - np.max(x, axis = 0))  # Normalization for numerical stability, from CS231n notes
        return x / np.sum(x, axis=0)
    if (activation == 'sigmoid'):
        return 1 / (1 + np.exp(-x))
    else:
        return x

def loss_function(y_true, y_pred, loss = 'ce'):
    batch_size = y_true.shape[0]
    if loss == 'sq':
        e_y = np.zeros_like(y_pred)
        e_y[y_true, range(batch_size)] = 1
        return (1.0 / (2.0 * batch_size)) * np.sum((e_y - y_pred)**2)
    if loss == 'ce':
        return (-1.0 / batch_size) * np.log(y_pred[y_true, range(batch_size)]).sum()


def setup_logger(name, log_file, level=logging.INFO):
    """Function setup as many loggers as you want"""

    handler = logging.FileHandler(log_file, mode = 'w')        
    handler.setFormatter(formatter)

    logger = logging.getLogger(name)
    logger.setLevel(level)
    logger.addHandler(handler)

    return logger

In [54]:
import numpy as np

class Network:

    def __init__(self, num_hidden, sizes, activation_choice = 'softmax', output_choice = 'softmax', loss_choice = 'ce'):
        # L hidden layers, layer 0 is input, layer (L+1) is output
        self.sizes = sizes
        sizes = [300] + sizes + [3]
        self.L = num_hidden
        self.output_shape = 10
        # Parameter map from theta to Ws and bs
        self.param_map = {}
        start, end = 0, 0
        for i in range(1, self.L + 2):
            end = start + sizes[i - 1] * sizes[i]
            self.param_map['W{}'.format(i)] = (start, end)
            start = end
            end = start + sizes[i]
            self.param_map['b{}'.format(i)] = (start, end)
            start = end
        num_params = end
        # Parameter vector - theta
        self.theta = np.random.uniform(-1.0, 1.0, num_params)
        # Gradient vector - theta
        self.grad_theta = np.zeros_like(self.theta)
        # Map theta (grad_theta) to params (grad_params)
        self.params = {}
        self.grad_params = {}
        for i in range(1, self.L + 2):
            weight = 'W{}'.format(i)
            start, end = self.param_map[weight]
            self.params[weight] = self.theta[start : end].reshape((sizes[i], sizes[i - 1]))
            self.grad_params[weight] = self.grad_theta[start : end].reshape((sizes[i], sizes[i - 1]))
            bias = 'b{}'.format(i)
            start, end = self.param_map[bias]
            self.params[bias] = self.theta[start : end].reshape((sizes[i], 1))
            self.grad_params[bias] = self.grad_theta[start : end].reshape((sizes[i], 1))

        self.activation_choice = activation_choice
        self.output_choice = output_choice
        self.loss_choice = loss_choice

    # x is of shape (input_size, batch_size), y is of shape (batch_size)
    def forward(self, x, y):
        # a(i) = b(i) + W(i)*h(i-1)
        # h(i) = g(i-1)
        self.activations = {}
        self.activations['h0'] = x
        self.batch_size = x.shape[1]
        for i in range (1, self.L + 1):
            self.activations['a{}'.format(i)] = self.params['b{}'.format(i)] + np.matmul(self.params['W{}'.format(i)], self.activations['h{}'.format(i-1)])
            self.activations['h{}'.format(i)] = activation_function(self.activations['a{}'.format(i)], self.activation_choice)

        self.activations['a{}'.format(self.L + 1)] = self.params['b{}'.format(self.L + 1)] + np.matmul(self.params['W{}'.format(self.L+1)], self.activations['h{}'.format(self.L)])
        y_pred = output_function(self.activations['a{}'.format(self.L + 1)], self.output_choice)
        loss = loss_function(y, y_pred, self.loss_choice)
        return y_pred, loss

    def backward(self, y_true, y_pred):
        grad_activations = {}
        # Compute output gradient
        e_y = np.zeros_like(y_pred)
        e_y[y_true, range(self.batch_size)] = 1
        if self.loss_choice == 'ce':
            grad_activations['a{}'.format(self.L + 1)] = -(e_y - y_pred)
        elif self.loss_choice == 'sq':
            grad_activations['a{}'.format(self.L + 1)] = -(e_y - y_pred) * y_pred * (1 - y_pred)
        for k in range (self.L + 1, 0, -1):
            # Gradients wrt parameters
            self.grad_params['W{}'.format(k)][:, :] = (1.0 / self.batch_size) * np.matmul(grad_activations['a{}'.format(k)], self.activations['h{}'.format(k-1)].T)
            self.grad_params['b{}'.format(k)][:, :] = (1.0 / self.batch_size) * np.sum(grad_activations['a{}'.format(k)], axis = 1, keepdims = True)
            # Do not compute gradients with respect to the inputs
            if k == 1:
                break
            # Gradients wrt prev layer
            grad_activations['h{}'.format(k-1)] = np.matmul(self.params['W{}'.format(k)].T, grad_activations['a{}'.format(k)])
            # Gradients wrt prev preactivation
            if self.activation_choice == 'sigmoid':
                grad_activation_ = np.multiply(self.activations['h{}'.format(k - 1)], 1 - self.activations['h{}'.format(k - 1)])
            elif self.activation_choice == 'tanh':
                grad_activation_ = 1 - (self.activations['h{}'.format(k - 1)]) ** 2
            elif self.activation_choice == 'relu':
                grad_activation_ = np.zeros_like(self.activations['a{}'.format(k - 1)])
                grad_activation_[np.where(self.activations['a{}'.format(k - 1)] > 0)] = 1.0
            grad_activations['a{}'.format(k-1)] = np.multiply(grad_activations['h{}'.format(k-1)], grad_activation_)

    def performance(self, y_true, y_pred):
        y_pred = y_pred.argmax(axis = 0)
        return float(np.sum(y_pred != y_true)) /y_pred.shape[0] * 100

    def predict(self, x):
        y_pred, _ = self.forward(x, np.ones((x.shape[1]), dtype = np.int16))
        return y_pred.argmax(axis = 0)

    def save(self, path):
        np.save(path, self.theta)

    def load(self, theta = None, path = None):
        if path != None:
            theta = np.load(path)
        self.theta[:] = theta


In [55]:
a = Network(3, [3,3,3], activation_choice = 'softmax', output_choice = 'softmax', loss_choice = 'loss')
y_pred, loss = a.forward(X,y)
a.backward(y,y_pred)

ValueError: shapes (3,300) and (3,300) not aligned: 300 (dim 1) != 3 (dim 0)

In [56]:
def forward(self, x, y):
    # a(i) = b(i) + W(i)*h(i-1)
    # h(i) = g(i-1)
    activations = {}
    activations['h0'] = x
    batch_size = x.shape[1]
    activation_choice = 'softmax'
    
    for i in range (1, self.L + 1):
        activations['a{}'.format(i)] = params['b{}'.format(i)] + np.matmul(self.params['W{}'.format(i)], activations['h{}'.format(i-1)])
        activations['h{}'.format(i)] = activation_function(activations['a{}'.format(i)], activation_choice)



In [49]:
y.shape

(1, 300)