In [1]:
#--build and train ANN for MNIST image classifier
#--data preprocessing
#---reading and understanding the dataset
#---preparing training dataset -> covert to tensors -> prepare batches -> split into train/test
#--model
#---build model -> train model -> evaluate model
#--inference
#---inference on single image
#---save model -> load model -> re-evaluate model

In [2]:
#--mount google drive for dataset
from google.colab import drive
drive.mount('/content/mount_gdrive')

Mounted at /content/mount_gdrive


In [19]:
#--import libraries

import torch
import torch.nn as nn
import torchvision
from torchvision.datasets import MNIST

from torchvision.transforms import ToTensor
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [4]:
#--read and understand dataset
data_path="/content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset"

dataset=MNIST(root=data_path,
              download=True,
              )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 70627835.89it/s]


Extracting /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/train-images-idx3-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 128594154.80it/s]

Extracting /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/train-labels-idx1-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 21711558.84it/s]


Extracting /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 24455107.53it/s]


Extracting /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/dataset/MNIST/raw



In [6]:
#--check shape and datatype
print(len(dataset))
print(dataset[0])

60000
(<PIL.Image.Image image mode=L size=28x28 at 0x7EB7442CAAD0>, 5)


In [11]:
#--convert to tensors & split into train and test
dataset=MNIST(root=data_path,
              download=True,
              train=True,
              transform=ToTensor()
              )

print(dataset[0])

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000

In [13]:
#--random split into train/val
train_ds,val_ds=random_split(dataset,[50000,10000])

In [15]:
#--convert into batches
batch_size=32

train_loader=DataLoader(train_ds,batch_size,num_workers=4)
val_loader=DataLoader(val_ds,batch_size,num_workers=4)



In [18]:
#--check shape of batches
for images,labels in train_loader:
  print(images.shape)
  break



torch.Size([32, 1, 28, 28])


In [44]:
input_shape=28*28
output_shape=10
hidden_layer_shape=20

loss_fn=F.cross_entropy
epochs=5

In [45]:
#--define accuracy function
def acc(preds,labels):
  _,max_prob_index=torch.max(preds,dim=1)
  return torch.tensor(torch.sum(max_prob_index==labels).item()/len(preds))

In [82]:
#--build model
class mnist_ann(nn.Module):
  def __init__(self,input_shape,hidden_layer_shape,output_shape):
    super().__init__()
    self.linear1=nn.Linear(input_shape,hidden_layer_shape)#---hidden layer
    self.linear2=nn.Linear(hidden_layer_shape,output_shape)#---output layer

  def forward(self,xb):
    xb=xb.view(xb.size(0),-1)
    # print(xb.shape)
    out=self.linear1(xb)
    out=F.relu(out)
    out=self.linear2(out)
    return out

  def training_step(self,batch): #--return loss and acc for 1 training batch
    images,labels=batch
    preds=self(images)
    preds=F.softmax(preds,dim=1)
    train_loss=loss_fn(preds,labels)
    train_acc=acc(preds,labels)

    return {
        "train_loss":train_loss,
        "train_acc":train_acc
    }

  def validation_step(self,batch): #--return loss and acc for 1 training batch
    images,labels=batch
    preds=self(images)
    preds=F.softmax(preds,dim=1)
    val_loss=loss_fn(preds,labels)
    val_acc=acc(preds,labels)

    return {
        "val_loss":val_loss,
        "val_acc":val_acc
    }

  def validation_end_epoch(self,outputs): #--accumulate and return average val_loss & val_acc after each epoch
    batch_loss=[x['val_loss'] for x in outputs]
    batch_acc=[x['val_acc'] for x in outputs]

    avg_loss=torch.stack(batch_loss).mean()
    avg_acc=torch.stack(batch_acc).mean()

    return {
        "val_loss":avg_loss.item(),
        "val_acc":avg_acc.item()
    }

  def epoch_end(self,epoch,result):
    print("Epoch : [{}], val_loss : [{:.4f}], val_acc : [{:.4f}]".format(epoch,result['val_loss'],result['val_acc']))


In [83]:
#--initiate model
model=mnist_ann(input_shape,hidden_layer_shape,output_shape)
model

mnist_ann(
  (linear1): Linear(in_features=784, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=10, bias=True)
)

In [84]:
opt=torch.optim.SGD(model.parameters(),1e-3)
opt

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [85]:
#--model training

history=[]

def evaluate(model,val_loader): #--evaluate model after each epoch on full validation dataset
  outputs=[model.validation_step(batch) for batch in val_loader]#--> accumulate results of each batch
  result=model.validation_end_epoch(outputs) #--take out the average of all batches
  return result

def fit(model,epochs,train_loader,val_loader,opt):
  #--iterrate through each epoch
  for epoch in range(epochs):
    #--iterrate through each batch
    for batch in train_loader:
      train_result=model.training_step(batch)
      train_loss=train_result['train_loss']
      train_acc=train_result['train_acc']
      #--compute gradients
      train_loss.backward()
      #--update weights & bias
      opt.step()
      #--reset the gradients
      opt.zero_grad()

    print("Epoch : [{}], train_loss : [{:.4f}], train_acc : [{:.4f}]".format(epoch,train_result['train_loss'].item(),train_result['train_acc'].item()))

  #--once each epoch training completed, evaluate on val data
    val_result=evaluate(model,val_loader)
    model.epoch_end(epoch,val_result)
    history.append(val_result)

  return history


In [87]:
#--start model training
model_history=fit(model,epochs,train_loader,val_loader,opt)

Epoch : [0], train_loss : [2.2525], train_acc : [0.1875]
Epoch : [0], val_loss : [2.2686], val_acc : [0.1819]
Epoch : [1], train_loss : [2.2273], train_acc : [0.1875]
Epoch : [1], val_loss : [2.2530], val_acc : [0.1903]
Epoch : [2], train_loss : [2.2019], train_acc : [0.2500]
Epoch : [2], val_loss : [2.2376], val_acc : [0.2270]
Epoch : [3], train_loss : [2.1830], train_acc : [0.3125]
Epoch : [3], val_loss : [2.2237], val_acc : [0.2845]
Epoch : [4], train_loss : [2.1687], train_acc : [0.1875]
Epoch : [4], val_loss : [2.2099], val_acc : [0.3249]


In [88]:
#--evaluate performance on test dataset
test_dataset=MNIST(root=data_path,
              download=True,
                   train=False,
                   transform=ToTensor()
              )


In [89]:
test_loader=DataLoader(test_dataset,batch_size)
test_result=evaluate(model,test_loader)
print(test_result)

{'val_loss': 2.2102789878845215, 'val_acc': 0.3208865821361542}


In [99]:
#--inference on sample image
from PIL import Image
from torchvision.transforms import transforms

image_path="/content/mount_gdrive/MyDrive/Learning_AI_dataset/pytorch_dataset/mnist_sample_image.png"

In [110]:
#--create inference image function
def inference_image(model,image_path):
  #--read image
  pil_image=Image.open(image_path).convert('L')
  print(pil_image.size)
  pil_image=pil_image.resize((28,28)) #--resize image to input shape
  print(pil_image.size)

  transform = transforms.Compose([transforms.PILToTensor()])
  img = transform(pil_image)
  print(pil_image.size)

  img=img.unsqueeze(0)#--extend dimension
  print(img.shape)
  xb = img.to(torch.float32)
  print(xb.shape)
  preds=model(xb)
  max_prob,class_label=torch.max(preds,dim=1)

  return max_prob,class_label


In [111]:
#--perform image inference
max_prob,class_label=inference_image(model,image_path)
print(max_prob,class_label)

(255, 264)
(28, 28)
(28, 28)
torch.Size([1, 1, 28, 28])
torch.Size([1, 1, 28, 28])
tensor([174.2121], grad_fn=<MaxBackward0>) tensor([0])
