In [70]:
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,Subset
import pandas as pd

In [6]:
transform_img=transforms.Compose([
    transforms.ToTensor(), transforms.Lambda(lambda x: x.view(-1))])

In [7]:
mnist_train=datasets.MNIST(root='./mnist_data', train=True, transform=transform_img)
mnist_test=datasets.MNIST(root='./mnist_data', train=False, transform=transform_img)

In [60]:
class SoftmaxRegression:
    
    def __init__(self):
        self.w=torch.randn(10,784)*0.01
        self.b=torch.zeros(10,)
        if torch.cuda.is_available():
            self.w=self.w.to('cuda'); self.b=self.b.to('cuda')
            
    def softmax_function(self,X_train):
        z= (self.w)@(X_train.T)+self.b.unsqueeze(1) 
        z_max= z.max(axis=0, keepdim=True).values
        exp_z=torch.exp(z-z_max)
        sum_exp_z=exp_z.sum(axis=0,keepdim=True)
        softmax=(exp_z/sum_exp_z).T
        return softmax
            
    def loss_function(self,X_train,y_train):
        #using cross entropy loss
        y_tg=(torch.arange(10).unsqueeze(0)==y_train.unsqueeze(1)).int()  
        # torch.arange(10).unsqueeze(0)- creates a tensor of digits 0 to 9 ([0 1 2 3 4 5 6 7 8 9]) and unsqueezes its shape from [10] to [1,10]
        # y_train.unsqueeze(1)- unsqueezes the y_train tensor from [128] to [128,1] 
        #"==" broadcasts the [1,10] and [128,1] tensor to [128,10]
        # '.int()' bool function, that places 0 if condition not true and 1 otherwise
        y_pred=self.softmax
        y_pred=torch.clamp(y_pred, min=1e-7, max=1-1e-7)
        CE_loss=-(y_tg*torch.log(y_pred)).sum(axis=1)
        CE_loss=CE_loss.mean()
        return CE_loss

    def gradient_descent(self,X_train,y_train,lr):
        y_tg=(torch.arange(10, device= y_train.device).unsqueeze(0)==y_train.unsqueeze(1)).int()
        y_pred=self.softmax_function(X_train)
        error= ((y_pred)-(y_tg))/len(X_train)
        w_grad=error.T@X_train
        b_grad=error.sum(axis=0)
        self.w-=w_grad*lr
        self.b-=b_grad*lr
        return self.w,self.b
        
    def predict(self,X_test):
        y_pred=self.softmax_function(X_test)
        pred_class=torch.argmax(y_pred, axis=1)
        return pred_class
        
        
        
    

In [None]:
train_loader=DataLoader(mnist_train, batch_size=128, shuffle=True)

In [76]:
epochs=100
model=SoftmaxRegression()
for _ in range(epochs):
    for images,labels in train_loader:
        if torch.cuda.is_available():
            X_train=images.to('cuda')
            y_train=labels.to('cuda')
        model.gradient_descent(X_train,y_train,lr=0.01)
        
    
    

In [None]:
count=0
for images,labels in mnist_test:
    if torch.cuda.is_available():
        X_test=images.to('cuda')
    pred=model.predict(X_test).cpu()
    correct=(pred==labels).sum().item()
    count+=correct
    df=pd.DataFrame({
        "predicted class": model.predict(X_test).tolist(),
        "actual class": labels.tolist()
        })
    print(df.head(50))

In [78]:
print(f" the model correctly predicted {count} / {len(mnist_test)}")
print(f" the percentage accuracy is {(count/len(mnist_test))*100}")

 the model correctly predicted 9204 / 10000
 the percentage accuracy is 92.04
