In [2]:
from abc import ABC, abstractmethod
import numpy as np
from tqdm.auto import trange
from tensorflow.keras.datasets import mnist

### Data preprocessing

In [3]:
def flatten_images(data):
    return data.reshape((data.shape[0], -1))

def one_hot_labels(data):
    new_data = np.zeros((data.shape[0], data.max() + 1))
    new_data[range(data.shape[0]),data] = 1
    return new_data                   

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = [flatten_images(x)/255 for x in [x_train, x_test]]
y_train, y_test = [one_hot_labels(y) for y in [y_train, y_test]]
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

### Abstract classes

In [5]:
class Layer(ABC):
    @abstractmethod
    def __init__(self):
        pass
    
    @abstractmethod
    def forward(self):
        pass
    
    @abstractmethod    
    def backward(self):
        pass
    
class Activation(Layer):
    def __init__(self):
        pass
    
class Loss(ABC):
    @abstractmethod
    def value(self, y_true, z):
        pass
    
    @abstractmethod
    def gradient(self, y_true, a):
        pass
    
    @abstractmethod
    def last_layer_activations(self, z):
        pass
    
class Metric(ABC):
    def __init__(self):
        self.value = None
    
    @abstractmethod
    def calculate(self, y, a):
        self.value = None
    
    def __repr__(self):
        return "{0}: {1}".format(self.__class__.__name__, self.value)

### Abstract class implementations

In [6]:
class Dense(Layer):
    def __init__(self, input_size, layer_size):
        self.learning_rate = 0.1
        self.output_size = layer_size
        self.input_size = input_size
        self.W = self.__initialize_weights()
        self.b = self.__initialize_biases()
        
    def __initialize_biases(self):
        return np.zeros(self.output_size)
        
    def __initialize_weights(self):
        W = np.random.randn(self.input_size, self.output_size) * (1/self.input_size)
        return W
    
    def __gradient_step(self, weights_grad, bias_grad):
        self.W -= self.learning_rate * weights_grad
        self.b -= self.learning_rate * bias_grad
    
    def forward(self,layer_input):
        layer_output = (layer_input @ self.W) + self.b
        return layer_output
    
    def backward(self, layer_input, chain_grad):
        new_chain_grad = chain_grad @ self.W.T 
        weights_grad = layer_input.T @ chain_grad
        bias_grad = np.sum(chain_grad, axis=0)
        self.__gradient_step(weights_grad, bias_grad)
        return new_chain_grad

class ReLu(Activation):    
    def forward(self, layer_input):
        return np.maximum(0,layer_input)
    
    def backward(self, layer_input, chain_grad):
        grad = layer_input > 0
        return chain_grad * grad
    
class NegativeLogLikelihood(Loss):        
    def value(self, y_true, z):
        z_true = z[y_true==1.]
        return np.mean(-z_true + np.log(np.sum(np.exp(z),axis=-1)))

    def gradient(self, y_true, a):
        return (a - y_true)/y_true.shape[0]
        
    def last_layer_activations(self, z):
        softmax = np.exp(z)/np.sum(np.exp(z), axis=-1, keepdims=True)
        return softmax

class Accuracy(Metric):
    def calculate(self, y, a):
        self.value = np.sum(np.argmax(y,axis=1)==np.argmax(a,axis=1))/y.shape[0]
        
class Model:
    def __init__(self, layers, loss_function, epochs, batch_size, metrics=[]):
        self.layers = layers
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss_function = loss_function
        self.metrics = metrics
        self.loss = np.inf
        
    def __forward(self, _input):
        activations = []
        for layer in self.layers:
            _input = layer.forward(_input)
            activations.append(_input)
        return activations
    
    def __backward(self,activations, chain_grad):
        for layer, layer_input in list(zip(self.layers, activations))[::-1]:
            chain_grad = layer.backward(layer_input, chain_grad)
    
    def __calculate_metrics(self, y, a):
        for metric in self.metrics:
            metric.calculate(y, a) 
    
    def __print_metrics(self):
        print(", ".join(["Loss: {0}".format(self.loss)] + [str(m) for m in self.metrics]))

    def __batch_step(self, X, y):
        activations = self.__forward(X)
        self.loss = self.loss_function.value(y, activations[-1])
        probabilities = self.loss_function.last_layer_activations(activations[-1])
        self.__calculate_metrics(y, probabilities)
        loss_grad = self.loss_function.gradient(y, probabilities)
        self.__backward([X] + activations[:-1], loss_grad)
    
    def predict(self, X):
        activations = self.__forward(X)[-1]
        probabilities = self.loss_function.last_layer_activations(activations)
        return np.exp(activations) / np.exp(activations).sum(axis=-1,keepdims=True)
    
    def fit(self, X, y):
        batch_count = y.shape[0]//self.batch_size
        for i in range(self.epochs):
            for j in trange(batch_count, desc='Epoch {0}:'.format(i)):
                if j < batch_count - 1:
                    batch_X = X[j*self.batch_size:(j+1)*self.batch_size,:] 
                    batch_y = y[j*self.batch_size:(j+1)*self.batch_size,:] 
                else:
                    batch_X = X[j*self.batch_size:,:]
                    batch_y = y[j*self.batch_size:,:]
                self.__batch_step(batch_X, batch_y)
            self.__print_metrics()

### Neural network definition and training

In [7]:
model = Model(
    layers=[
        Dense(x_train.shape[1],50),
        ReLu(),
        Dense(50,100),
        ReLu(),
        Dense(100,10)
    ],
    loss_function=NegativeLogLikelihood(),
    metrics=[
        Accuracy()
    ],
    epochs=20,
    batch_size=32,
)

In [8]:
model.fit(x_train, y_train)

HBox(children=(IntProgress(value=0, description='Epoch 0:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.08619815230137498, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 1:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.030582315467546084, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 2:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.018670443321086916, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 3:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.010038298033699933, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 4:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.009810253052072893, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 5:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.008358818808580137, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 6:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.0047085668587854845, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 7:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.004716619491512919, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 8:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.009901375669279122, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 9:', max=1875, style=ProgressStyle(description_width='i…


Loss: 0.006167553479467958, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 10:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.004218875362454799, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 11:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.010077667569022342, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 12:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.008506821927047659, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 13:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.004509310709411463, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 14:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.01433938568238069, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 15:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.009481220339245533, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 16:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.005262661482664466, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 17:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.0005814872519404601, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 18:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.004662518335625643, Accuracy: 1.0


HBox(children=(IntProgress(value=0, description='Epoch 19:', max=1875, style=ProgressStyle(description_width='…


Loss: 0.004113177611575924, Accuracy: 1.0


### Measuring model performance on test set

In [9]:
y_pred = model.predict(x_test[:1000,:])

In [10]:
acc = np.sum(np.argmax(y_pred,axis=1) == np.argmax(y_test[:1000,:], axis=1))/1000
print("Test set accuracy: {0}".format(acc))

Test set accuracy: 0.972
