In [1]:
import numpy as np
rand = np.random.default_rng(42)

In [2]:
class Layer:
    def __init__(self):
        pass 

    def __call__(self,x):
        pass 

    def backward(self,out_grad,learning_rate):
        pass 

In [3]:
class Linear(Layer):
    def __init__(self,fan_in,fan_out,seed=42):
        self.fan_in = fan_in 
        self.fan_out = fan_out

        limit = np.sqrt(6 / (fan_in + fan_out))
        self.weights = np.random.uniform(-limit, limit, size=(fan_out, fan_in))
        # self.weights = np.random.randn(fan_out,fan_in)
        self.bias = np.ones((fan_out,1))
        # print(self.weights.shape)
        # print(self.grad.shape)
    def __call__(self,x):
        self.input = x 
        return np.dot(self.weights,x.reshape(self.fan_in,-1)) + self.bias
    

    def __repr__(self):
        pass 

    def backward(self, out_grad, learning_rate ):
        wgrad = np.dot(out_grad, self.input.T) 
        bgrad = out_grad 
        inputgrad = np.dot(self.weights.T, out_grad)

        self.weights -= learning_rate * wgrad 
        self.bias -= learning_rate * bgrad 

        # print(f"{self.wgrad}")
        # print(f"{self.bgrad}")

        return inputgrad 

    def reset_grad(self):
        self.wgrad = np.zeros((self.fan_in,self.fan_out))
        self.bgrad = np.zeros((1,self.fan_out)) 


In [4]:
class Softmax(Layer):
    def __call__(self, input):
        self.input = input
        tmp = input - max(input)  
        tmp = np.exp(tmp)
        self.output = tmp / np.sum(tmp)
        return self.output 
    def backward(self, out_grad, learning_rate):
        n = np.size(self.output) 
        return np.dot((np.identity(n)-self.output.T) * self.output, out_grad)  

In [5]:
class Activation(Layer):
    def __init__(self,activation,activation_prime):
        self.activation = activation 
        self.activation_grad = activation_prime

    def __call__(self, input):
        self.input = input 
        return self.activation(self.input) 
    
    def backward(self, out_grad, learning_rate):
        return np.multiply(out_grad, self.activation_grad(self.input))

In [6]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)
        def tanh_grad(x):
            return 1-np.tanh(x)**2 
        super().__init__(tanh,tanh_grad)

In [7]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1.0 / (1.0 + np.exp(-x))
        
        def sigmoid_grad(x):
            return sigmoid(x) * (1.0 - sigmoid(x)) 
        super().__init__(sigmoid,sigmoid_grad) 
            

In [8]:
class ReLU(Activation):
    def __init__(self):
        def relu(x):
            return np.maximum(0,x)
        def relu_grad(x):
            return np.where(x > 0, 1, np.where(x < 0, 0, 0.5))
        super().__init__(relu,relu_grad)

In [45]:
class Dropout(Layer):
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate
        self.mask = None

    def __call__(self, x, training=False):
        if training:
            self.mask = (np.random.rand(*x.shape) < (1 - self.dropout_rate)) / (1 - self.dropout_rate)
            # print(self.mask)
            return x * self.mask
        else:
            return x

    def backward(self, grad):
        return grad * self.mask if self.mask is not None else grad

In [10]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred):
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred):
    return np.mean(-y_true/y_pred) 

In [11]:
class NN:
    def __init__(self, *layers):
        self.layers = [] 
        for layer in layers:
            self.layers += [layer]
        
    def __call__(self,input):
        for layer in self.layers:
            input = layer(input)
        return input 
    def save(self,filename):
        pass 
    def load(self,filename):
        pass
    
    def train_minibatch(self,):
        pass 
    
    def train_batch(self, loss, loss_grad, X, y, epochs = 1000, learning_rate = 0.001, verbose=True):
        for epoch in range(epochs):
            error = 0 
            for x, y in zip(X,y):
                output = self.__call__(self,x)
                error += loss(y,output)

                grad = loss_grad(y,output)

                for layer in reversed(self.layers):
                    grad = layer.backward(grad, learning_rate)
            error /= len(X)
            if verbose:
                print(f"{epoch=}, {error=}")

    def eval(self, loss, X,y):
        error = 0 
        for x,y in zip(X,y):
            output = self.__call__(self,x)
            error += loss(y,output)
        error /= len(X)
        return error 
    


In [12]:
lin = Linear(10,5)
relu = ReLU() 
sigmoid = Sigmoid()
softmax = Softmax()

model = NN(lin, sigmoid, relu, softmax) 

x = rand.random(10).reshape(-1,1)

print(x)
model(x)

[[0.77395605]
 [0.43887844]
 [0.85859792]
 [0.69736803]
 [0.09417735]
 [0.97562235]
 [0.7611397 ]
 [0.78606431]
 [0.12811363]
 [0.45038594]]


array([[0.1491558 ],
       [0.22285238],
       [0.25821857],
       [0.26527804],
       [0.10449521]])

In [13]:
model = NN(Linear(32,5), ReLU(), Linear(5,10), Softmax()) 

x = np.random.randn(1,32)

print(x)
result = model(x) 
print(result, np.sum(result))


[[ 0.5306864   1.59314176  2.00005323  0.32641984  1.1931023   0.5481565
   0.429037   -0.33640329  0.91954891  0.40844161  0.16845908 -1.34799869
   1.00846121  0.573484   -1.50532525  0.82257466  0.31869984 -0.35581409
  -2.02165578  0.11128437 -1.33786852  0.01399539 -0.36155983 -0.82571222
   0.84308082  0.15730206 -1.63736037 -1.06320412  0.30038575 -1.56543225
   0.01121587 -0.34955071]]
[[9.99883685e-09]
 [1.74438509e-04]
 [8.64152200e-10]
 [6.40567144e-05]
 [3.02823232e-08]
 [1.47757057e-08]
 [9.91997588e-16]
 [9.99018038e-01]
 [7.43362000e-04]
 [4.85393448e-08]] 1.0000000000000002


Import Dataset

In [14]:
from torchvision import datasets, transforms 

train_validation_dataset = datasets.EMNIST(root='./data', 
                                           split='letters',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True,
                                        )


independent_test_dataset = datasets.EMNIST(
                             root='./data',
                             split='letters',
                             train=False,
                             transform=transforms.ToTensor(),
                             )


In [15]:
type(train_validation_dataset)
train_validation_dataset.data.reshape(-1,28*28)

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)

In [16]:
import pandas as pd 

df = pd.DataFrame(train_validation_dataset.data.reshape(-1,28*28))

In [17]:
df[:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,774,775,776,777,778,779,780,781,782,783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
model = NN(Linear(28, 26), Dropout(0.5))

In [56]:
x = np.random.randn(28,1)
x
drop = Dropout(0.8)

In [84]:
drop(x, training=True)

array([[ 0.        ],
       [-0.        ],
       [-0.        ],
       [-0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.        ],
       [-0.        ],
       [-2.47025895],
       [ 0.        ],
       [-0.        ],
       [-0.        ],
       [-5.12283294],
       [ 0.        ],
       [-0.        ],
       [-0.        ],
       [-0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.        ],
       [-0.        ],
       [ 0.        ],
       [-0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.        ],
       [-3.1709267 ],
       [-0.        ]])