<a href="https://colab.research.google.com/github/taizun-jj202/PyTorch_Basics/blob/NN/MNIST_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#importing libraries
import torch
import torch.nn as nn
import torch.optim as optim # contains SGD, Adam
import torch.nn.functional as F #Contains activation funcs , relu,tanh etc
from torch.utils.data import DataLoader
import torchvision.datasets as ds
import torchvision.transforms as transforms

In [2]:
# Creating the NN

class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.f1 = nn.Linear(input_size, 50)
        self.f2 = nn.Linear(50, num_classes)

    def forward(self ,x):
       x = F.relu(self.f1(x))
       x = self.f2(x)
       return x 

In [3]:
#Checking if the model returns the output in proper shape 
m = NN(784, 10)
X = torch.rand(64,784)
print(m(X).shape)

torch.Size([64, 10])


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
#Setting the Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 5

In [6]:
#Loading the data
train_dataset = ds.MNIST(root = 'dataset/',train = True, transform = transforms.ToTensor(),download = True)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size,shuffle=True)

test_dataset = ds.MNIST(root = 'dataset/',train = False, transform = transforms.ToTensor(),download = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size,shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 326858215.10it/s]

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 129834612.89it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 182969770.80it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3625909.55it/s]


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [7]:
#Initialize the network
model = NN(input_size, num_classes).to(device)

In [8]:
#Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer  = optim.Adam(model.parameters(), lr = learning_rate)

In [9]:
#Training the network
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        
        #run data and targets on GPU
        data = data.to(device = device)
        targets = targets.to(device = device)
        
        #Get to correct shape
        data = data.reshape(data.shape[0],-1)

        #Forward
        scores = model(data)
        loss = criterion(scores, targets)

        #backprop
        optimizer.zero_grad() #Set gradient to zero for each batch/run
        loss.backward() # pytorch automatically runs the backprop using this command

        #gradient step
        optimizer.step() #Updating the steps computed in loss.backward()

        

In [10]:
#Check accuracy of the model to see how good it is

def accuracy(loader, model):
    if loader.dataset.train :
        print("Checking accuracy on train data")
    else :
        print("Checking accuracy on test data")
   
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x,y in loader :
            x = x.to(device = device)
            y = y.to(device = device)
            x = x.reshape(x.shape[0], -1)
            
            scores = model(x)
            _ , predictions = scores.max(1) #Gives the index of the highest value of the second dimension
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

          
        print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")

    model.train()


accuracy(train_loader, model)
accuracy(test_loader, model)

Checking accuracy on train data
Got 58229 / 60000 with accuracy 97.05
Checking accuracy on test data
Got 9631 / 10000 with accuracy 96.31
