In [86]:
import numpy as np
rand = np.random.default_rng(42)

In [87]:
class Layer:
    def __init__(self):
        pass 

    def __call__(self,x,train=False):
        pass 

    def backward(self,out_grad,learning_rate):
        pass 

In [88]:
class Linear(Layer):
    def __init__(self,fan_in,fan_out,seed=42):
        self.fan_in = fan_in 
        self.fan_out = fan_out

        #xavier init 
        limit = np.sqrt(6 / (fan_in + fan_out))
        self.weights = np.random.uniform(-limit, limit, size=(fan_out, fan_in))
        self.bias = np.ones((fan_out,1))

    def __call__(self,x,train=False):
        self.input = x
        return np.dot(self.weights,self.input) + self.bias
    

    def __repr__(self):
        pass 

    def backward(self, out_grad, learning_rate ):
        wgrad = np.dot(out_grad, self.input.T) / np.size(out_grad, axis=1) # mean  
        bgrad = np.mean(out_grad, axis=1, keepdims=True) 
        inputgrad = np.dot(self.weights.T, out_grad)

        self.weights -= learning_rate * wgrad 
        self.bias -= learning_rate * bgrad 

        # print(f"{self.wgrad}")
        # print(f"{self.bgrad}")

        return inputgrad 

    def reset_grad(self):
        self.wgrad = np.zeros((self.fan_in,self.fan_out))
        self.bgrad = np.zeros((1,self.fan_out)) 


In [89]:
class Softmax(Layer):
    def __call__(self, input, train=False):
        self.input = input
        tmp = input - np.max(input, axis=0)  
        tmp = np.exp(tmp)
        self.output = tmp / np.sum(tmp, axis=0, keepdims=True)
        return self.output 
    def backward(self, out_grad, learning_rate):
        n = np.size(self.output, axis=0) 
        grad = np.hstack([ np.dot( (np.identity(n) - input.T )*input, out_grad) for input in self.input.T  ])
        return grad #np.dot((np.identity(n)-self.output.T) * self.output, out_grad)  

In [90]:
class Activation(Layer):
    def __init__(self,activation,activation_prime):
        self.activation = activation 
        self.activation_grad = activation_prime

    def __call__(self, input, train=False):
        self.input = input 
        return self.activation(self.input) 
    
    def backward(self, out_grad, learning_rate):
        return np.multiply(out_grad, self.activation_grad(self.input))

In [91]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)
        def tanh_grad(x):
            return 1-np.tanh(x)**2 
        super().__init__(tanh,tanh_grad)

In [92]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1.0 / (1.0 + np.exp(-x))
        
        def sigmoid_grad(x):
            return sigmoid(x) * (1.0 - sigmoid(x)) 
        super().__init__(sigmoid,sigmoid_grad) 
            

In [93]:
class ReLU(Activation):
    def __init__(self):
        def relu(x):
            return np.maximum(0,x)
        def relu_grad(x):
            return np.where(x > 0, 1, np.where(x < 0, 0, 0.5))
        super().__init__(relu,relu_grad)

In [94]:
class Dropout(Layer):
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate
        self.mask = None

    def __call__(self, x, train=False):
        if train:
            self.mask = (np.random.rand(*x.shape) < (1 - self.dropout_rate)) / (1 - self.dropout_rate)
            # print(self.mask)
            return x * self.mask
        else:
            return x

    def backward(self, grad, learning_rate):
        return grad * self.mask if self.mask is not None else grad

In [95]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip( y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip( y_pred, epsilon, 1.0-epsilon)
    return (-y_true/y_pred) / len(y_true)  

In [96]:
class NN:
    def __init__(self, *layers):
        self.layers : Layer = [] 
        for layer in layers:
            self.layers += [layer]
        
    def __call__(self,input,train=False):
        for layer in self.layers:
            input = layer(input,train=train)
        return input 
    def backward(self, ):
        pass 
    
    def save(self,filename):
        pass 
    def load(self,filename):
        pass
    
    def train_minibatch(self,):
        pass 
    
    def train(self, loss, loss_grad, X, y, epochs = 1000, batch_size = 8, learning_rate = 0.001, verbose=True):
        shuffled_indices = np.arange(len(X))
        np.random.shuffle(shuffled_indices) 

        X = X[shuffled_indices]
        y = y[shuffled_indices] 

        for epoch in range(epochs):
            error = 0 

            for i in range((len(X)+batch_size-1)//batch_size):
                X_batch = X[i:i+batch_size].T
                y_batch = y[i:i+batch_size].T

                y_pred = self.__call__(X_batch,train=True)
                error += loss(y_batch,y_pred)

                grad = loss_grad(y_batch,y_pred)

                for layer in reversed(self.layers):
                    grad = layer.backward(grad, learning_rate)
            error /= len(X)
            if verbose:
                print(f"{epoch=}, {error=}")

    def eval(self, X,y):
        corr = 0 
        for x,y in zip(X,y):
            y_pred = self.__call__(x)
            corr += 1 if np.argmax(y_pred) == np.argmax(y) else 0 
        
        return corr/len(X) 
    


Import Dataset

In [97]:
from torchvision import datasets, transforms 

train_validation_dataset = datasets.EMNIST(root='./data', 
                                           split='letters',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True,
                                        )


independent_test_dataset = datasets.EMNIST(
                             root='./data',
                             split='letters',
                             train=False,
                             transform=transforms.ToTensor(),
                             )


In [98]:
type(train_validation_dataset)
n_classes = len(train_validation_dataset.classes)
n_datapoints = len(train_validation_dataset.targets)

X_train = np.array(train_validation_dataset.data.reshape(-1,28*28))
# y_train = np.zeros((len(train_validation_dataset.targets),len(train_validation_dataset.classes)))

y_train = np.array([[1 if temp == i else 0 for i in range(n_classes)] for temp in train_validation_dataset.targets])
y_train[3]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0])

In [99]:
X_test = np.array(independent_test_dataset.data.reshape(-1,28*28))
y_test = np.array([[1 if temp == i else 0 for i in range(n_classes)] for temp in independent_test_dataset.targets])

In [100]:
import pandas as pd 

df = pd.DataFrame(train_validation_dataset.data.reshape(-1,28*28))

In [101]:
model = NN(Linear(28*28,1024), ReLU(), Dropout(0.2), Linear(1024,n_classes), ReLU(), Dropout(0.3), Sigmoid())

In [103]:
model.train(cross_entropy, cross_entropy_grad, X_train, y_train, batch_size=1000, epochs=10, learning_rate=0.01)

epoch=0, error=1.5341182674297338e-05
epoch=1, error=1.3935674039037677e-05
epoch=2, error=1.2982227036972222e-05
epoch=3, error=1.2300383413063474e-05
epoch=4, error=1.1698609802370086e-05
epoch=5, error=1.107696364799779e-05
epoch=6, error=1.062510406754492e-05
epoch=7, error=1.0207003265852734e-05
epoch=8, error=9.885472741422722e-06
epoch=9, error=9.658039603395563e-06


In [104]:
model.eval(X_test,y_test)

In [None]:
model(X_train[:4].T)

(784, 4)

In [None]:
y_train[:4].T

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0]])