# Deep learning - project 1
## Creating and training neural network from scratch

In [3]:
import numpy as np
from typing import List, Callable, Iterable
from abc import ABC, abstractmethod 

In [25]:
"""
Abstract classes for loss function and activation function.
"""

class Loss(ABC):
    """
    Base class for a loss function of a network
    # implements: operator(), derivative()
    """
    @abstractmethod
    def __call__(self, y_predicted: np.array, y_true: np.array):
        pass
    
    @abstractmethod
    def derivative(self, y_predicted: np.array, y_true: np.array):
        pass

class Activation(ABC):
    """Base class for an activation function"""
    @abstractmethod
    def __call__(self, x: np.array) -> np.array:
        """Activation (pointwise) function"""
        pass
    
    @abstractmethod
    def derivative(self, x: np.array) -> np.array:
        """
        Derivative method of the activation function.
        """
        pass

class Layer:
    """
    One layer of a neural network. Used only when creating neural networks.
    # attributes: number of neurons
    # implements: operator() which depends on activation function, derivative()
    """
    def __init__(self, n_neurons: int, activation: Activation, *, verbose=False):
        self.n_neurons = n_neurons
        self.output_size = n_neurons
        self.activation = activation
        self.verbose = verbose
    
    def initalize(self, input_size, *, sd=0.1):
        self.input_size = input_size
        self.W = np.random.normal(0, sd, size=[input_size, self.output_size])  # weights
        self.b = np.random.normal(0, sd, size=[1, self.output_size])  # biases
        
    def verbose_print(self, s: str):
        if self.verbose:
            print(s)
    
    def __call__(self, input_vector: np.array):
        self.verbose_print("calculating next vector from sizes {}x{} with matrix {}x{}".format(*input_vector.shape, self.input_size, self.output_size))
        return self.activation(input_vector.dot(self.W) + self.b)
    
    def __str__(self):
        return "Layer(input_size={}, output_size={})".format(self.input_size, self.output_size)
    
    def __repr__(self):
        return str(self)
    
    def derivative_weights(self, state: np.array):
        # Let H be the dimansionality of the input state, W be H x K matrix, n number of samples
        n = state.shape[0]
        next_state = state.dot(self.W) # n x K
        outer_der = self.activation.derivative(next_state) # n x K x K
        inner_der = np.zeros([n, self.output_size, self.input_size, self.output_size])  # n x K x (H x K)
        for i, row, tensor in enumerate(zip(state, inner_der)):  # tensor: K x H x K
            for j, matrix in enumerate(tensor):  # matrix: H x K
                matrix[:, j] = row.flatten()  # flatten because row is a 1 x H array
        return inner_der @ outer_der 
        
    
    def derivative_state(self, state: np.array):
        next_state = state.dot(self.W)  # TODO: optimize: no need to caluclate it in backward pass
        return self.W.dot(self.activation.derivative(next_state))

In [42]:
"""
Actual implementation of specific loss functions and activation functions.
"""

class QuadraticLoss(Loss):
    """Loss for simple regression: mean squared error."""
    def __call__(self, y_predicted: np.array, y_true: np.array):
        if len(y_predicted) != len(y_true):
            raise IndexError("length of y_predicted ({}) has to be the same as lenght of y_true ({})".format(len(y_predicted), len(y_true)))
        return np.linalg.norm(y_predicted - y_true) / len(y_true)
    
    def derivative(self, y_predicted: np.array, y_true: np.array):
        n = len(y_predicted)
        assert len(y_true) == n
        return 2 / n * (y_predicted - y_true)
        
    
class BernLoss(Loss):
    """
    Loss for binary classification (negative binomial likelihood),
    also known as cross-entropy between the empirical and model distribution (binomial).
    """
    def __call__(self, y_predicted: np.array, y_true: np.array):
        return -np.mean(
            np.array(
                [np.log(p) if y == 1 else np.log(1-p) 
                 for p, y in zip(y_predicted, y_true)]
            )
        )

# TODO: should this be a class? There is no state in it
class Sigmoid(Activation):
    def __call__(self, x: np.array) -> np.array:
        return 1 / (1 +  np.exp(-x))
    
    def derivative(self, x: np.array):
        der = self(x) * (1 - self(x))
        return np.array([np.diag(row) for row in der])  # TODO: this is sparse matrix: possible memory and time optimization
    
class Identity(Activation):
    def __call__(self, x: np.array) -> np.array:
        return x
    
    def derivative(self, x: np.array):  # x is n x D
        dim = x.shape[1]
        return np.array([np.identity(row) for row in x])


In [43]:
class NNet:
    """Feedforwad (classical) neural network"""
    def __init__(self, input_size: int, layers: List[Layer], loss: Loss):
        layers[0].initalize(input_size)
        for i, layer in enumerate(layers):
            if i == 0:
                continue
            layer.initalize(input_size=layers[i-1].output_size)
        self.layers = layers
        self.loss = loss
        
    def __call__(self, x: np.array):
        """Forwad pass"""
        y = x
        for layer in self.layers:
            y = layer(y)
        return y
        

In [44]:
nnet = NNet(
    10,
    [Layer(1, Identity())],
    QuadraticLoss()
)

In [45]:
x = np.random.normal(0, 100, [100, 10])

In [46]:
nnet(x)[:5]

array([[ 56.22139407],
       [  5.0670563 ],
       [ 38.64728734],
       [-36.19524707],
       [-15.67747743]])