### All imports handled here

In [25]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

### Data Downloaded

In [26]:
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [162]:
class_names = train_data.classes
class_names
torch.manual_seed(42)

<torch._C.Generator at 0x7780b6f13570>

In [202]:
class conv(nn.Module):
    def __init__(self,kernel_size:int,output_channels:int):
        super().__init__()
        
        self.kernel_size = kernel_size
        self.output_channels = output_channels
        self.kernel = torch.rand(output_channels,kernel_size,kernel_size)/(kernel_size)**2
    def forward(self, input_img:torch.Tensor ,padding : int , stride:int):
        self.stride = stride
        input_channels , width, height = input_img.shape
        self.img = input_img
        self.input_channels = input_channels
        self.width = width
        self.height = height
        w = self.width
        h= self.height
        k_w = self.kernel_size
        k_h = self.kernel_size
        input_img = self.img
        kernel = self.kernel
        pad = padding
        padding//=2
        pad_tensor_vert = torch.zeros(self.output_channels,padding,self.width)
        pad_tensor_horiz = torch.zeros(self.output_channels,self.height+2*padding,padding)

        input_img = torch.cat((pad_tensor_vert, input_img,pad_tensor_vert),dim=1)
        input_img = torch.cat((pad_tensor_horiz, input_img,pad_tensor_horiz),dim=2)
        self.input_img = input_img
        input_channels , width, height = input_img.shape
        self.img = input_img
        self.input_channels = input_channels
        self.width = width
        self.height = height
        w = self.width
        h= self.height
        # for i,img in enumerate(input_img):
        #     img = torch.cat((pad_tensor_vert,img,pad_tensor_vert),dim=0)
        #     img = torch.cat((pad_tensor_horiz,img,pad_tensor_horiz),dim=1)
        #     input_img[i] = img
        # print(input_img.shape)
        
        feature_map = torch.zeros(self.output_channels,(w-k_w)//stride+1 , (h-k_h)//stride+1)
        for out_channel in range(self.output_channels):
            for in_channel in range(self.input_channels):
                for i in range(0,input_img.shape[1],stride):
                    for j in range(0,input_img.shape[2],stride):
                        feature_map[out_channel,i//stride,j//stride] += torch.sum(input_img[in_channel,i:i+k_w,j:j+k_h]*kernel[out_channel])
        # print(feature_map.shape)
        return feature_map
    def backward(self,d_L_d_Y:torch.Tensor,learning_rate:float):
        d_L_d_W = torch.zeros_like(self.kernel)
        # print(d_L_d_W.shape)
        for out_channel in range(self.output_channels):
            for i in range(0,d_L_d_Y.shape[1],self.stride):
                for j in range(0,d_L_d_Y.shape[2],self.stride):
                    # for i1 in range(i,i+k_w):
                    #     for j1 in range(j,j+k_h):
                    #         feature_map[i/stride,j/stride] += padded_img[i1,j1]*kernel[i1-i,j1-j]
                    # d_L_d_W[out_channel] += torch.sum(d_L_d_Y[in_channel,i:i+self.kernel_size,j:j+self.kernel_size]*self.kernel[out_channel])
                    d_L_d_W[out_channel] += d_L_d_Y[out_channel,i,j]*self.input_img[out_channel,i:i+self.kernel_size,j:j+self.kernel_size]
        self.kernel -= learning_rate * d_L_d_W

In [251]:
class MaxPool(nn.Module):
    # Assumed the kernel to be a square matrix and the stride to be the same as the kernel side size
    def __init__(self,kernel_width : int):
        
        self.kernel_width = kernel_width
        self.stride = kernel_width
    def forward(self,img : torch.Tensor):
        input_channel, height, width = img.shape
        self.img = img
        self.input_channel = input_channel
        self.height = height
        self.width = width
        feature_map = torch.zeros(self.input_channel,(self.width-self.kernel_width)//self.kernel_width +1, (self.height-self.kernel_width)//self.kernel_width +1)
        # print(self.width," ",self.kernel_width)
        for channel in range(self.input_channel):
            for i in range(0,self.width,self.stride):
                for j in range(0,self.height,self.stride):
                    

                    feature_map[channel,(i-self.kernel_width)//self.kernel_width+1, (j-self.kernel_width)//self.kernel_width+1] = torch.amax(self.img[channel,i:i+self.kernel_width,j:j+self.kernel_width])
        # print(feature_map.shape)

        return feature_map

    def backward(self, d_L_d_out):
        d_L_d_input = torch.zeros(self.img.shape)
        for channel in range(self.input_channel):
            for i in range(0,self.width,self.stride):
                for j in range(0,self.height,self.stride):
                    sample_space = self.img[channel,i:i+self.kernel_width,j:j+self.kernel_width]
                    answer = torch.max(sample_space)
                    index = torch.nonzero(answer == sample_space, as_tuple=False)
                    d_L_d_input[channel,index[0][0],index[0][1]] = d_L_d_out[channel,i//self.stride,j//self.stride]
        return d_L_d_input

In [273]:
class softmax(nn.Module):
  def __init__(self,input_len, nodes):
    self.weights = torch.rand(input_len,nodes)/input_len
    self.biases = torch.zeros(nodes)
    self.input_len = input_len
    self.nodes = nodes

  def forward(self,img:torch.Tensor):
    self.last_input_shape= img.shape
    img = img.flatten()
    self.input = img
    img = torch.reshape(img,(1,img.shape[0]))
    total = torch.matmul(img,self.weights)
    # print(total.shape)
    # print(total)
    total = torch.squeeze(total)
    total+=self.biases
    # print(total.shape)
    # print(total)

    self.total = total
    exp = torch.exp(total)
    return exp/torch.sum(exp,dim=0)

  def backward(self,d_L_d_out,learning_rate):
    for i,gradient in enumerate(d_L_d_out):
      if gradient ==0:
        continue
      t_exp = torch.exp(self.total)
      S= torch.sum(t_exp)

      d_out_d_t = -t_exp[i]*t_exp/(S**2)
      d_out_d_t[i] = t_exp[i]*(S-t_exp[i])/(S**2)

      d_t_d_w = self.input
      d_t_d_input = self.weights

      d_L_d_t= gradient*d_out_d_t

      d_L_d_w = torch.unsqueeze(d_t_d_w,0).T @ torch.unsqueeze(d_L_d_t,0)
      # print(d_L_d_w.shape)
      d_L_d_b = d_L_d_t
      d_L_d_input = d_t_d_input @  d_L_d_t
      print(d_L_d_input.shape)
      self.weights -= learning_rate*d_L_d_w
      self.biases -= learning_rate*d_L_d_b
    return d_L_d_input.reshape(self.last_input_shape)

In [274]:
# Converts 1x28x28 to 1x30x30(padding) finally to 1x10x10
Conv = conv(3,1) #kernel size = 3, output channels = 1
# Converts 1x10x10 to 1x5x5
pool = MaxPool(2)
# Converts 1x5x5 to 25 to finally 10
final_layer = softmax(25,10)

print(final_layer.weights)

tensor([[3.5634e-02, 3.1862e-02, 2.5685e-02, 1.9103e-02, 2.4996e-02, 3.9296e-02,
         2.6332e-02, 1.0025e-02, 2.7737e-02, 1.2063e-02],
        [1.9290e-02, 7.1185e-03, 1.9415e-02, 2.1043e-02, 7.8704e-03, 3.2858e-02,
         3.8472e-02, 3.2713e-03, 2.5795e-02, 6.1839e-03],
        [1.6025e-02, 2.9580e-02, 1.9649e-02, 2.6061e-02, 1.3520e-02, 3.9086e-02,
         1.0610e-02, 2.3743e-02, 2.2053e-02, 3.3624e-02],
        [2.2042e-02, 3.2058e-02, 2.5153e-02, 3.2752e-02, 2.0253e-02, 2.6674e-02,
         3.9728e-02, 1.3484e-02, 1.1861e-02, 3.4518e-02],
        [2.7648e-02, 1.1218e-03, 2.7113e-02, 3.8491e-04, 9.1415e-03, 2.7827e-02,
         3.0536e-02, 1.5484e-02, 3.6929e-02, 3.9876e-02],
        [3.5595e-02, 1.6309e-03, 1.2725e-02, 2.1217e-02, 3.6255e-02, 1.6645e-02,
         2.4178e-02, 3.0527e-02, 9.5360e-03, 7.8881e-03],
        [3.9779e-02, 1.9554e-02, 3.4152e-02, 4.9816e-04, 1.2366e-02, 3.4034e-02,
         1.8064e-02, 2.6737e-02, 3.2456e-03, 3.9467e-02],
        [1.0018e-02, 1.8989

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [277]:
image ,label= train_data[0]
image/=255
image-=0.5
image
# out = Conv.forward(image,2,3)

tensor([[[-0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000],
         [-0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000],
         [-0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000],
         [-0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000, -0.5000,
          -0.5000, -0.5000, -0.5000, -0.5000, -0

In [278]:
out = Conv.forward(image,2,3)
out.shape
for i in range(0,10,2):
    for j in range(0,10,2):
        subgrid = out[0,i:i+2,j:j+2]
        subgrid = subgrid.reshape(-1,2,2)
        val = torch.amax(subgrid,dim=(1,2))
        print(val)

tensor([-0.1143])
tensor([-0.1387])
tensor([-0.1387])
tensor([-0.1387])
tensor([-0.0844])
tensor([-0.1674])
tensor([-0.2049])
tensor([-0.2035])
tensor([-0.2036])
tensor([-0.1187])
tensor([-0.1674])
tensor([-0.2044])
tensor([-0.2036])
tensor([-0.2036])
tensor([-0.1187])
tensor([-0.1668])
tensor([-0.2036])
tensor([-0.2036])
tensor([-0.2036])
tensor([-0.1185])
tensor([-0.1353])
tensor([-0.1557])
tensor([-0.1557])
tensor([-0.1557])
tensor([-0.0843])


In [279]:
out = pool.forward(out)
out

tensor([[[-0.1143, -0.1387, -0.1387, -0.1387, -0.0844],
         [-0.1674, -0.2049, -0.2035, -0.2036, -0.1187],
         [-0.1674, -0.2044, -0.2036, -0.2036, -0.1187],
         [-0.1668, -0.2036, -0.2036, -0.2036, -0.1185],
         [-0.1353, -0.1557, -0.1557, -0.1557, -0.0843]]])

In [280]:
out = final_layer.forward(out)


In [281]:
gradient = torch.zeros(10)
gradient[label] =-1/out[label]

In [282]:
gradient = final_layer.backward(gradient,learning_rate=0.005)

torch.Size([25])


In [270]:
def forward(image:torch.Tensor, label: int):
  out = Conv.forward((image/255)-0.5,2,3)
  out = pool.forward(out)
  # print(out.shape)
  out = final_layer.forward(out)

  loss = -torch.log(out[label])
  acc =1 if torch.argmax(out) == label else 0
  return out,loss,acc

def train(image,label,lr=0.005):
  out,loss,acc = forward(image,label)

  gradient = torch.zeros(10)
  gradient[label] =-1/out[label]

  gradient = final_layer.backward(gradient,lr)
  gradient= pool.backward(gradient)
  gradient = Conv.backward(gradient,lr)

  return loss,acc


In [272]:
for epoch in range(1):
  loss =0
  correct=0
  for i ,(image,label) in enumerate(train_data):
    if i>0 and i%2000 == 1999:
      print('[Step {:5d}] Past 2000 steps: Average Loss {:.3f} | Accuracy: {:2.2f}%'
                    .format(i + 1, loss / 2000, correct / 20))
      break

      loss = 0
      correct = 0
    l,acc= train(image,label)
    loss+=l
    correct+=acc

tensor([[-0.1286, -0.1095, -0.0606, -0.0909, -0.1097, -0.0822, -0.0157, -0.0747,
         -0.0540, -0.0613]])
torch.Size([10])
tensor([-0.1900, -0.1513, -0.0258, -0.1143, -0.1382, -0.1063,  0.0467, -0.0546,
        -0.0199, -0.0336])
tensor([[-0.1289, -0.1099, -0.0610, -0.0912, -0.1100, -0.0825, -0.0161, -0.0750,
         -0.0544, -0.0579]])
torch.Size([10])
tensor([-0.1908, -0.1521, -0.0267, -0.1151, -0.1389, -0.1071,  0.0458, -0.0554,
        -0.0209, -0.0257])
tensor([[-0.1258, -0.1105, -0.0616, -0.0918, -0.1107, -0.0831, -0.0166, -0.0756,
         -0.0549, -0.0585]])
torch.Size([10])
tensor([-0.1831, -0.1532, -0.0278, -0.1161, -0.1401, -0.1082,  0.0448, -0.0565,
        -0.0219, -0.0268])
tensor([[-0.1223, -0.1107, -0.0619, -0.0921, -0.1109, -0.0834, -0.0170, -0.0759,
         -0.0553, -0.0588]])
torch.Size([10])
tensor([-0.1750, -0.1539, -0.0286, -0.1169, -0.1408, -0.1090,  0.0438, -0.0574,
        -0.0228, -0.0277])
tensor([[-0.1224, -0.1110, -0.0622, -0.0886, -0.1111, -0.0837, -