In [12]:
import numpy as np
import random
import matplotlib.pyplot as plt
import torch
from torchvision import datasets,transforms
from torch.utils.data import DataLoader,Subset
import pickle

#### transforming each image into a tensor of shape [C,H,W]

In [2]:
transform_image=transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))])

#### loading the dataset and applying the transformation

In [None]:
mnist_train=datasets.MNIST(root='./mnist_data',train=True,download=True,transform=transform_image)
mnist_test=datasets.MNIST(root='./mnist_data', train=False,download=True,transform=transform_image)

#### visualizing the mnist dataset, using subplots

#### selecting the images with labels 0 and 1

In [4]:
indices_train=[i for i,(image,label) in enumerate(mnist_train) if label in (0,1)]
mnist_train_binary=Subset(mnist_train,indices_train)
indices_test=[i for i,(image,label) in enumerate(mnist_test) if label in (0,1)]
mnist_test_binary=Subset(mnist_test,indices_test)

#### using the dataloader functionality to create batches of size 64

In [5]:
train_loader=DataLoader(mnist_train_binary, batch_size=64, shuffle=True)

In [6]:
class LogisticRegression:
    def __init__(self):
        self.w=torch.randn(784)*0.01
        self.b=torch.tensor(0.0)
        if torch.cuda.is_available():
            self.w=self.w.to('cuda')
            self.b=self.b.to('cuda')
            
    def sigmoid(self,z):
        return 1/(1+torch.exp(-z))
        
    def BCE_loss(self,X_train,y_train):
        z=(X_train@self.w)+self.b
        y_pred=self.sigmoid(z)
        y_pred=torch.clamp(y_pred,min=1e-7,max=1-1e-7)
        BCEloss=-(y_train*torch.log(y_pred)+(1-y_train)*torch.log(1-(y_pred)))
        loss=BCEloss.mean()
        return loss

    def gradient_descent(self,X_train,y_train,alpha):
        z=(X_train@self.w)+self.b
        y_pred=self.sigmoid(z)
        y_pred=torch.clamp(y_pred,min=1e-7,max=1-1e-7)
        w_grad=(X_train.T)@(y_pred-y_train)
        b_grad=sum(y_pred-y_train)
        self.w-=w_grad*alpha*len(X_train)
        self.b-=b_grad*alpha*len(X_train)
        return self.w,self.b
    
    def predict(self,X_test):
        z=X_test@self.w+self.b
        p=self.sigmoid(z)
        if p>0.5:
            y_pred=1
        else:
            y_pred=0
        return y_pred

In [7]:
epochs=100
for _ in range(epochs):
    for images_batch,labels_batch in train_loader:
        X_train=images_batch
        y_train=labels_batch
        if torch.cuda.is_available():
            X_train=X_train.to('cuda')
            y_train=y_train.to('cuda')
        model=LogisticRegression()
        model.gradient_descent(X_train,y_train,alpha=0.01)
print(model.BCE_loss(X_train,y_train))

tensor(0.2944, device='cuda:0')


In [None]:
count=0
for image,label in mnist_test_binary:
    X_test=image
    if torch.cuda.is_available:
        X_test=X_test.to('cuda')
    print(f' actual value= {label}, predicted value= {model.predict(X_test)}')
    if model.predict(X_test)==label:
        count+=1

In [9]:
Accuracy=count/len(mnist_test_binary)
print(f" percentage accuracy of my model= {Accuracy*100}")

 percentage accuracy of my model= 99.76359338061465


In [13]:
with open('trained_model.pkl', 'wb') as f:
    pickle.dump(model, f)