In [1]:
import numpy as np 
from torchvision import datasets,transforms
from PIL import Image 
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm 
from torch.utils.data import DataLoader, Subset

In [2]:
train = datasets.MNIST(root="../data", download = True,train = True, transform = transforms.ToTensor())
test = datasets.MNIST(root="../data", download = True,train = False, transform = transforms.ToTensor())
train = Subset(train, list(range(320)))
# len(train)

In [3]:
class Softmax:
    def __intit__(self):
        pass
        
    def forward(self,input):
        self.last_input = input
        exp_x = np.exp(input - np.max(input))
        # print("SOFTMAX:-",exp_x/np.sum(exp_x, axis = 0, keepdims = True) )
        return exp_x/np.sum(exp_x, axis = 0, keepdims = True)

    def backward(self,loss):
        for i, grad in enumerate(loss):
            if grad == 0:
                continue
            exp = np.exp(self.last_input)
            sum_exp = np.sum(exp)

            out = -exp[i]*exp/(sum_exp**2)
            out[i] = exp[i]*(sum_exp-exp[i])/(sum_exp**2)
            out = out*grad
        return out

class Linear():
    def __init__(self,input_size,output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(self.output_size) - 0.5
    def forward(self,input):
        self.last_input = input 
        # print(self.weights)
        return np.dot(input, self.weights) + self.bias
    def backward(self,loss, learning_rate):
        # print(loss, self.last_input,self.weights)
        dt_dw = self.last_input
        dt_db = 1
        dt_dinputs = self.weights
        # print("*****",dt_dinputs.shape,loss.shape)
        dl_dw = dt_dw[np.newaxis].T*loss
        dl_db = loss*dt_db
        dl_dinputs = dt_dinputs.dot(loss)

        # print(dl_dinputs.shape)
        
        self.weights += learning_rate*dl_dw
        self.bias += learning_rate*dl_db

        # print('weights',self.weights)

        return dl_dinputs.reshape(self.last_input.shape)

class MaxPool():
    def __init__(self,Kernel_shape , stride):
        self.kernel_shape = Kernel_shape
        self.stride = stride

    def forward(self,input):
        self.last_input = input
        num_filters,h,w = input.shape
        # print(h,w)
        output_shape = (h-self.kernel_shape)//self.stride + 1
        output = np.zeros((num_filters,output_shape,output_shape))
        # print(output.shape)
        for num in range(num_filters):
            for i in range(0,h,self.stride):
                for j in range(0,w,self.stride):
                    
                    output[num,i//self.stride,j//self.stride] = np.max(input[num,i:i+self.kernel_shape,j:j+self.kernel_shape])
                    # print(output)
    
        return output

    def backward(self,loss):
        channels, h, w = loss.shape
        
        output = np.zeros(self.last_input.shape)
        # temp = []
        # print(loss.shape)
        for num in range(self.last_input.shape[0]):
            for i in range(0,self.last_input.shape[1],self.stride):
                for j in range(0,self.last_input.shape[2],self.stride):
                    # print(num,i,j)
                    temp = self.last_input[num,i:i+self.kernel_shape,j:j+self.kernel_shape]
                    max = np.amax(temp)
                    for i1 in range(temp.shape[0]):
                        for j1 in range(temp.shape[1]):
                            if temp[i1,j1] == max :
                                output[num,i+i1,j+j1] = loss[num, i//self.stride, j//self.stride]

        return output

class Conv:
    def __init__(self,input_channels ,output_channels,kernel_shape = 3, stride = 1, padding = False):
        self.output_channels = output_channels
        self.kernel_shape = kernel_shape
        self.stride = stride
        self.padding = padding 
        self.input_channels = input_channels
        limit = np.sqrt(6.0 / (input_channels*kernel_shape**2 + output_channels*kernel_shape**2))
        # self.weight = np.random.uniform(-limit, limit, (self.output_channels,self.input_channels,self.kernel_shape,self.kernel_shape))
        self.weight = np.random.rand(self.output_channels,self.input_channels,self.kernel_shape,self.kernel_shape) - 0.5
        # self.weight = np.array([[[[0,2],[1,1]]]],dtype = np.float64)
        # print('weight', self.weight)
        self.bias = np.random.rand(self.output_channels) - 0.5
        # self.weight = np.array([[[0.1,-0.2],[0.3,+0.6]],[[0.3,0.4],[-0.5,0.1]],[[-0.7,0.8],[0.9,-0.5]]], dtype = np.float64)
        
        # print(weight)
    def forward(self,input):
        self.last_input = input
        channels,h,w = input.shape
        # print(h,self.kernel_shape,self.)
        self.output_shape = (h - self.kernel_shape + 2*self.padding)//self.stride + 1
        output = np.zeros((self.output_channels, self.output_shape,self.output_shape))
        # print(output.shape)
        for num in range(self.output_channels):
            for i in range(0, h-self.kernel_shape+1, self.stride):
                for j in range(0,w-self.kernel_shape+1, self.stride):
                    # print(num)
                    output[num,i//self.stride,j//self.stride] =  np.sum(input[:,i:i+self.kernel_shape,j:j+self.kernel_shape]*self.weight[num]) +  self.bias[num]
                    # print('ooo', self.weight[num])
        # print('output_forward', output)
        return output
    def backward(self, loss, learn_rate):
        channels, h, w = self.last_input.shape
        dl_dfilters = np.zeros(self.weight.shape)
        dl_dinputs = np.zeros(self.last_input.shape)
        # dl_dbias = np.zeros(self.bias.shape)
        # print(self.weight.shape)
        for num in range(self.output_channels):
            for i in range(0,h-self.kernel_shape+1, self.stride):
                for j in range(0,w-self.kernel_shape+1, self.stride):
                    # print("****",loss[num,i//self.stride,j//self.stride].shape,self.last_input[:,i:i+self.kernel_shape,j:j+self.kernel_shape].shape)
                    # print(dl_dfilters[num].shape)
                    dl_dinputs[:,i:i+self.kernel_shape,j:j+self.kernel_shape] += loss[num,i//self.stride,j//self.stride]*self.weight[num]  
                    dl_dfilters[num] += loss[num,i//self.stride,j//self.stride]*self.last_input[:,i:i+self.kernel_shape,j:j+self.kernel_shape]
                    # dl_dbias = 
                    # print('kjdsgfk',num,(loss[num,i//self.stride,j//self.stride]).shape)
        # for i in range():
        #     dl_dfilters[num] = 
        # output = np.zeros(self.last_input.shape)
        # for num in range(self.output_shape):
        #     for i in range(output.shape[1]):
        #         for j in range(output.shape[2]):
                    
        
        self.weight += learn_rate*dl_dfilters
        # print('Updated weights',self.weight)
        # print(self.bias.shape, loss.shape)
        self.bias += learn_rate*np.sum(loss, axis = (1,2))
        # print(self.weight)
        return dl_dinputs

class ReLU:
    def __init__(self):
        pass
    def forward(self,input):
        self.last_input = input
        return np.maximum(0,input)
    def backward(self, loss):
        return loss*(self.last_input>0)
        
class Sigmoid:
    def __init__(self):
        pass
    def forward(self,input):
        # channels, h, w = input
        self.last_input = input
        exp = np.exp(-1*input)
        self.output = 1/(1+exp)
        # print(self.output)
        return self.output
        

    def backward(self,loss):
        # sig = 1/(1+np.exp(-1*self.last_input))
        # sig = 
        out = loss*self.output*(1-self.output)
        # print("OUT",out)
        return out

class Dropout:
    def __init__(self,keep_prob):
        self.keep_prob = keep_prob
    def forward(self,input):
        # self.mask = np.random.rand(*input.shape) < keep_prob
        # input_shape = i
        # self.mask = np.random.choice([0, 1], size=input.shape, p=[1 - self.keep_prob, self.keep_prob])
        self.mask = np.random.binomial(1, self.keep_prob, size=input.shape)
        input *= self.mask
        return input
    def backward(self,loss):
        return self.mask*loss
        

In [75]:
l = Linear(10,10)
# softmax = Softmax()
# conv_2 = Conv(1,1,2)
# conv_1 = Conv(1,1,2)
# r1 = ReLU()
# r2 = ReLU()
# l = Linear(5,5)
input = np.array([1,4,3,2,5,6,4,7,9,2],dtype = np.float64)
# input.shape

In [76]:
for i in range(50):
    x = l.forward(input)
    loss = np.square(input) - x
    print('loss',np.sum(loss)/len(loss))
    grad = l.backward(loss,0.001)

loss 23.065650095889072
loss 17.483762772683917
loss 13.25269218169441
loss 10.045540673724362
loss 7.614519830683067
loss 5.771806031657763
loss 4.375028971996587
loss 3.3162719607734106
loss 2.513734146266246
loss 1.9054104828698164
loss 1.4443011460153181
loss 1.0947802686796126
loss 0.8298434436591441
loss 0.6290213302936332
loss 0.4767981683625736
loss 0.3614130116188308
loss 0.27395106280707315
loss 0.20765490560775982
loss 0.15740241845068417
loss 0.11931103318561735
loss 0.09043776315469688
loss 0.06855182447126207
loss 0.051962282949215025
loss 0.039387410475506104
loss 0.0298556571404343
loss 0.02263058811244988
loss 0.017153985789235483
loss 0.013002721228242642
loss 0.009856062691006228
loss 0.007470895519780895
loss 0.005662938803995221
loss 0.004292507613427743
loss 0.0032537207709798953
loss 0.0024663203444019064
loss 0.0018694708210570954
loss 0.001417058882360278
loss 0.001074130632832404
loss 0.0008141910196842739
loss 0.0006171567929210808
loss 0.0004678048490346032


In [6]:
num_epochs = 100
lr = 0.001

conv1 = Conv(1,32,3)
relu1 = ReLU()
conv2 = Conv(32,64,3)
relu2 = ReLU()
pool1 = MaxPool(3,3)
drop1 = Dropout(0.5)
linear1 = Linear(4096,250)
sigmoid1 = Sigmoid()
relu3 = ReLU()
linear2 = Linear(250,10)
softmax1 = Softmax()

In [None]:
num_epochs = 100
lr = 0.01

# Initialize lists to store losses and accuracies
epoch_losses = []
epoch_accuracies = []

for epoch in range(num_epochs):
    epoch_loss = 0
    correct_count = 0
    total_samples = 0
    # progress_bar = tqdm(train, desc=f'Epoch {epoch + 1}/{num_epochs}', leave=False)
    progress_bar = train
    for image, label in progress_bar:
        input = np.array(image, dtype=np.float64).reshape(1, 28, 28) - 0.5

        # Forward pass
        x = relu1.forward(conv1.forward(input))
        x = relu2.forward(conv2.forward(x))
        x = pool1.forward(x)
        x = drop1.forward(x)
        print("before linear", x.reshape(-1))
        x = relu3.forward(linear1.forward(x.reshape(-1)))
        print("after linear1", x)
        x = softmax1.forward(linear2.forward(x))
        
        # loss = -np.log(x[label])
        x = np.clip(x, 1e-15, 1 - 1e-15)  
        y_true = np.zeros(10)
        y_true[label] = 1
        # print(x, label, np.argmax(x), 'trueee',y_true)
        # loss = -np.sum(y_true * np.log(y_pred))
        # epoch_loss += loss

        # Calculate accuracy
        # prediction = np.argmax(x)
        # # print('softmax',/ x, prediction)
        # correct_count += 1 if prediction == label else 0
        # total_samples += 1

        # Compute gradient
        grad = np.zeros(10)
        # print(prediction,label
        grad[label] = -1/x[label]
        # grad[label] = 1
        # grad = y_true - x

        grad = softmax1.backward(grad)
        grad = linear2.backward(grad, lr)
        grad = relu3.backward(grad)
        grad = linear1.backward(grad, lr)
        grad = drop1.backward(grad.reshape(64, 8, 8))
        grad = pool1.backward(grad)
        grad = relu2.backward(grad)
        # grad = pool2.backward(grad)
        grad = conv2.backward(grad, lr)
        grad = relu1.backward(grad)
        # grad = pool1.backward(grad)
        grad = conv1.backward(grad, lr)

        if total_samples == 3200:
            break
    
    
    epoch_loss /= len(train)
    epoch_accuracy = correct_count / total_samples
    epoch_losses.append(epoch_loss)
    epoch_accuracies.append(epoch_accuracy)

    progress_bar.close()

In [None]:
correct_count = 0
total_samples = len(test)

# Progress bar setup
progress_bar = tqdm(test, desc='Evaluating', leave=False)

for image, label in progress_bar:
    input = np.array(image, dtype=np.float64).reshape(1, 28, 28) 

    # Forward pass
    x = relu1.forward(conv1.forward(input)*0.1)
    x = relu2.forward(conv2.forward(x)*0.1)
    x = pool1.forward(x)
    # x = drop1.forward(x)
    x = sigmoid1.forward(linear1.forward(x.reshape(-1)))
    x = softmax1.forward(linear2.forward(x))

    # Calculate accuracy
    prediction = np.argmax(x)
    print(prediction)
    correct_count += 1 if prediction == label else 0

    # Update progress bar with current accuracy
    progress_bar.set_postfix(accuracy=f'{correct_count / total_samples:.4f}')

# Close progress bar
progress_bar.close()

# Calculate and print overall accuracy
accuracy = correct_count / total_samples
print(f'Test Accuracy: {accuracy:.4f}')