<a href="https://colab.research.google.com/github/sarthag/MNIST-Dataset/blob/main/MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
from torch.utils.data.dataset import TensorDataset
import time

In [2]:
class CNN(nn.Module):
    def __init__(self, in_channel =1, num_classes = 10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = (3,3), stride = (1,1), padding = (1,1))
        self.pool = nn.MaxPool2d(kernel_size= (2,2), stride = (2,2))
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3,3), stride = (1,1), padding = (1,1))
        self.fc1 = nn.Linear(16*7*7, num_classes)
    
    def forward(self, x):
        x = F.relu((self.conv1(x)))
        x = self.pool(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return(x)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [4]:
#Basic Test
model = CNN()
x = torch.randn(64,1,28,28)
model = model.to(device)
x = x.to(device)
start = time.process_time()
print(model(x).shape)
end = time.process_time()
print("time: ", end - start)

torch.Size([64, 10])
time:  0.013935892999999755


In [5]:
#parameters
in_channel = 1
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 25

In [6]:
#loading the data

X,y = fetch_openml("mnist_784", version = 1, return_X_y = True)

X = X.astype(np.float32)
y = np.int_(y)
X = X.reshape(X.shape[0], 1, 28, 28)
print(X.shape, y.shape)

(70000, 1, 28, 28) (70000,)


In [7]:
X_tensor = torch.from_numpy(X)
y_tensor = torch.from_numpy(y)
y_tensor = y_tensor.type(torch.LongTensor)
X_train, X_test, y_train, y_test = train_test_split(X_tensor,y_tensor, test_size = (1/7), random_state = 42)

In [8]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)

In [9]:
#initialise network
model = CNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate) 

In [10]:
def chk_accuracy(loader, model):
    
  num_correct = 0
  num_samples = 0
  model.eval()
    
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device = device)
      y = y.to(device = device)
      scores = model(x)
      predictions = scores.argmax(1)
      num_correct += sum((predictions == y))
      num_samples += predictions.size(0)
            
    return float(num_correct)/float(num_samples)

In [11]:
#Train the network
for epoch in range(num_epochs):
    model.train()
    if torch.cuda.is_available(): torch.cuda.empty_cache()
    model = model.to(device = device)

    loss_train = 0
    start = time.process_time()
    for batch, (data, targets) in enumerate(train_loader):
      data = data.to(device = device)
      targets = targets.to(device= device)
        
      #Forward Prop
      scores = model(data)
      loss = loss_fun(scores, targets)
        
      #Back prop
      optimizer.zero_grad()
      loss.backward()
      loss_train += loss.item()

      #Optimizer
      optimizer.step()

    train_acc = chk_accuracy(train_loader, model)
    val_acc = chk_accuracy(test_loader, model)
    avg_loss = loss_train/(len(train_loader))
    end = time.process_time()

    print('Epoch ({}/{}),Training loss : {:.4f}, Time: {:.2f}, train_accuracy:{:.4f}, val_accuracy:{:.4f}'.format(epoch+1, num_epochs, avg_loss, end - start, train_acc, val_acc))
                                

Epoch (1/25),Training loss : 0.6607, Time: 20.95, train_accuracy:0.9639, val_accuracy:0.9582
Epoch (2/25),Training loss : 0.1243, Time: 20.96, train_accuracy:0.9769, val_accuracy:0.9667
Epoch (3/25),Training loss : 0.0932, Time: 20.80, train_accuracy:0.9797, val_accuracy:0.9708
Epoch (4/25),Training loss : 0.0732, Time: 20.59, train_accuracy:0.9840, val_accuracy:0.9736
Epoch (5/25),Training loss : 0.0687, Time: 20.38, train_accuracy:0.9844, val_accuracy:0.9727
Epoch (6/25),Training loss : 0.0589, Time: 20.59, train_accuracy:0.9857, val_accuracy:0.9730
Epoch (7/25),Training loss : 0.0553, Time: 20.62, train_accuracy:0.9889, val_accuracy:0.9775
Epoch (8/25),Training loss : 0.0526, Time: 20.49, train_accuracy:0.9887, val_accuracy:0.9747
Epoch (9/25),Training loss : 0.0471, Time: 20.62, train_accuracy:0.9875, val_accuracy:0.9752
Epoch (10/25),Training loss : 0.0436, Time: 20.65, train_accuracy:0.9899, val_accuracy:0.9782
Epoch (11/25),Training loss : 0.0423, Time: 21.68, train_accuracy:0.9