In [293]:
import numpy as np
rand = np.random.default_rng(42)

In [294]:
class Layer:
    def __init__(self):
        pass 

    def __call__(self,x,train=False):
        pass 

    def backward(self,out_grad,learning_rate):
        pass 

In [295]:
class Linear(Layer):
    def __init__(self,fan_in,fan_out,seed=42):
        self.fan_in = fan_in 
        self.fan_out = fan_out

        limit = np.sqrt(6 / (fan_in + fan_out))
        self.weights = np.random.uniform(-limit, limit, size=(fan_out, fan_in))
        self.bias = np.ones((fan_out,1))

    def __call__(self,x,train=False):
        self.input = x.reshape(self.fan_in,-1)
        return np.dot(self.weights,self.input) + self.bias
    

    def __repr__(self):
        pass 

    def backward(self, out_grad, learning_rate ):
        wgrad = np.dot(out_grad, self.input.T) 
        bgrad = out_grad 
        inputgrad = np.dot(self.weights.T, out_grad)

        self.weights -= learning_rate * wgrad 
        self.bias -= learning_rate * bgrad 

        # print(f"{self.wgrad}")
        # print(f"{self.bgrad}")

        return inputgrad 

    def reset_grad(self):
        self.wgrad = np.zeros((self.fan_in,self.fan_out))
        self.bgrad = np.zeros((1,self.fan_out)) 


In [296]:
class Softmax(Layer):
    def __call__(self, input, train=False):
        self.input = input
        tmp = input - max(input)  
        tmp = np.exp(tmp)
        self.output = tmp / np.sum(tmp)
        return self.output 
    def backward(self, out_grad, learning_rate):
        n = np.size(self.output) 
        return np.dot((np.identity(n)-self.output.T) * self.output, out_grad)  

In [297]:
class Activation(Layer):
    def __init__(self,activation,activation_prime):
        self.activation = activation 
        self.activation_grad = activation_prime

    def __call__(self, input, train=False):
        self.input = input 
        return self.activation(self.input) 
    
    def backward(self, out_grad, learning_rate):
        return np.multiply(out_grad, self.activation_grad(self.input))

In [298]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)
        def tanh_grad(x):
            return 1-np.tanh(x)**2 
        super().__init__(tanh,tanh_grad)

In [299]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1.0 / (1.0 + np.exp(-x))
        
        def sigmoid_grad(x):
            return sigmoid(x) * (1.0 - sigmoid(x)) 
        super().__init__(sigmoid,sigmoid_grad) 
            

In [300]:
class ReLU(Activation):
    def __init__(self):
        def relu(x):
            return np.maximum(0,x)
        def relu_grad(x):
            return np.where(x > 0, 1, np.where(x < 0, 0, 0.5))
        super().__init__(relu,relu_grad)

In [301]:
class Dropout(Layer):
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate
        self.mask = None

    def __call__(self, x, train=False):
        if train:
            self.mask = (np.random.rand(*x.shape) < (1 - self.dropout_rate)) / (1 - self.dropout_rate)
            # print(self.mask)
            return x * self.mask
        else:
            return x

    def backward(self, grad, learning_rate):
        return grad * self.mask if self.mask is not None else grad

In [302]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip( y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip( y_pred, epsilon, 1.0-epsilon)
    return (-y_true/y_pred) / len(y_true)  

In [303]:
class NN:
    def __init__(self, *layers):
        self.layers : Layer = [] 
        for layer in layers:
            self.layers += [layer]
        
    def __call__(self,input,train=False):
        for layer in self.layers:
            input = layer(input,train=train)
        return input 
    def backward(self, ):
        pass 
    
    def save(self,filename):
        pass 
    def load(self,filename):
        pass
    
    def train_minibatch(self,):
        pass 
    
    def train_batch(self, loss, loss_grad, X, y, epochs = 1000, learning_rate = 0.001, verbose=True):
        for epoch in range(epochs):
            error = 0 
            for x, y_ in zip(X,y):
                output = self.__call__(x,train=True)
                
                y__ = y_.reshape(-1,1)
                error += loss(y__,output)

                grad = loss_grad(y__,output)

                for layer in reversed(self.layers):
                    grad = layer.backward(grad, learning_rate)
            error /= len(X)
            if verbose:
                print(f"{epoch=}, {error=}")

    def eval(self, loss, X,y):
        error = 0 
        for x,y in zip(X,y):
            output = self.__call__(x)
            error += loss(y,output)
        error /= len(X)
        return error 
    


Import Dataset

In [304]:
from torchvision import datasets, transforms 

train_validation_dataset = datasets.EMNIST(root='./data', 
                                           split='letters',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True,
                                        )


independent_test_dataset = datasets.EMNIST(
                             root='./data',
                             split='letters',
                             train=False,
                             transform=transforms.ToTensor(),
                             )


In [305]:
type(train_validation_dataset)
n_classes = len(train_validation_dataset.classes)
n_datapoints = len(train_validation_dataset.targets)

X_train = np.array(train_validation_dataset.data.reshape(-1,28*28))
# y_train = np.zeros((len(train_validation_dataset.targets),len(train_validation_dataset.classes)))

y_train = np.array([[1 if temp == i else 0 for i in range(n_classes)] for temp in train_validation_dataset.targets])
y_train[3]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0])

In [306]:
X_test = np.array(independent_test_dataset.data.reshape(-1,28*28))
y_test = np.array([[1 if temp == i else 0 for i in range(n_classes)] for temp in independent_test_dataset.targets])

In [307]:
import pandas as pd 

df = pd.DataFrame(train_validation_dataset.data.reshape(-1,28*28))

In [308]:
df[:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,774,775,776,777,778,779,780,781,782,783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [309]:
model = NN(Linear(28, 26), Dropout(0.5))

In [310]:
model = NN(Linear(28*28,1024), ReLU(), Dropout(0.2), Linear(1024,n_classes), Softmax(), Dropout(0.3), Sigmoid())

In [311]:
model.train_batch(cross_entropy, cross_entropy_grad, X_train, y_train, 3, learning_rate=0.01)

In [None]:
model(X_test[8])

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [None]:
y_test[8].reshape

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0])