<a href="https://colab.research.google.com/github/tnguyen7s/ai-cloud-club-semo/blob/master/aic_mnist_fc_03302023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import packages

In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import matplotlib.pyplot as plt
import torch.nn as nn
import pandas as pd


# Set up Device for training

In [2]:
GPU = torch.cuda.is_available()

if GPU:
  device = torch.device('cuda')
  print('Using GPU')
else:
  device = torch.device('cpu')
  print('Using CPU')

Using GPU


# Data

In [3]:
transformer = transforms.Compose([
    transforms.Resize((20,20)),
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0,), std=(1,))
])

In [4]:
train_dataset = datasets.MNIST('./sample_data', train=True, transform=transformer, download=True)
test_dataset = datasets.MNIST('./sample_data', train=False, transform=transformer, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./sample_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/train-images-idx3-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw



In [5]:
BATCH_SIZE=128 # each training iteration, use 128 images and its labels

In [6]:
from torch.utils.data.sampler import SubsetRandomSampler
# for training
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=SubsetRandomSampler(range(50000)))
# for validation
val_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=SubsetRandomSampler(range(50000, 60000)))

test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [7]:
train_iter = iter(train_loader)

In [8]:
next(train_iter)

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ..

In [9]:
data, target = next(train_iter)

In [10]:
data.shape
# 128 is the number of images; batch size
# 1 is the number of color channel
# 20 is the width
# 20 is the height

torch.Size([128, 1, 20, 20])

In [11]:
target.shape

torch.Size([128])

In [12]:
target

tensor([6, 6, 4, 1, 8, 8, 1, 3, 5, 6, 3, 3, 5, 3, 1, 0, 6, 2, 8, 1, 4, 3, 2, 7,
        6, 3, 5, 3, 1, 0, 5, 8, 4, 0, 9, 6, 8, 2, 3, 1, 6, 7, 2, 4, 6, 4, 1, 3,
        8, 2, 4, 7, 5, 1, 3, 7, 3, 8, 9, 1, 3, 3, 6, 2, 7, 6, 6, 0, 3, 5, 8, 2,
        5, 5, 5, 2, 8, 3, 2, 1, 1, 2, 0, 3, 3, 5, 7, 8, 1, 7, 5, 7, 1, 3, 2, 3,
        3, 1, 5, 3, 6, 3, 7, 2, 4, 3, 3, 6, 7, 8, 5, 7, 2, 1, 9, 7, 3, 1, 5, 7,
        8, 9, 5, 3, 8, 0, 7, 8])

In [13]:
target.unique() # labels for training

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# Image Show

In [14]:
def imshow(tensor_image):
  np_image = tensor_image.numpy()

  plt.imshow(np_image)
  plt.axis('off')
  plt.show()

In [15]:
data[5,0]

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0157, 0.1686, 0.4078, 0.4431,
         0.4353, 0.3333, 0.1216, 0.0078, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.2118, 0.6902, 0.6157, 0.5412,
         0.5451, 0.5882, 0.5882, 0.3608, 0.1098, 0.0039, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0118, 0.3647, 0.6

# Fully Connected Model

In [16]:
def create_fc_model_v1(device):
  model = nn.Sequential(
      nn.Flatten(),
      nn.Linear(in_features=400, out_features=200),
      nn.ReLU(), #non linear function
      nn.Linear(in_features=200, out_features=314),
      nn.ReLU(),
      nn.Linear(in_features=314, out_features=146),
      nn.ReLU(),
      nn.Linear(in_features=146, out_features=400),
      nn.ReLU(),
      nn.Linear(in_features=400, out_features=10), 
      nn.Softmax()
  )

  model = model.to(device) # to load the model to GPU or CPU
  return model


In [18]:
model = create_fc_model_v1(device) # create a model instance

In [19]:
from torchsummary import summary
summary(model, (1,20,20), 128, device="cuda") # yours would be cuda

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                 [128, 400]               0
            Linear-2                 [128, 200]          80,200
              ReLU-3                 [128, 200]               0
            Linear-4                 [128, 314]          63,114
              ReLU-5                 [128, 314]               0
            Linear-6                 [128, 146]          45,990
              ReLU-7                 [128, 146]               0
            Linear-8                 [128, 400]          58,800
              ReLU-9                 [128, 400]               0
           Linear-10                  [128, 10]           4,010
          Softmax-11                  [128, 10]               0
Total params: 252,114
Trainable params: 252,114
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.20
Forward/

  input = module(input)


# Optimizer

In [20]:
from torch.optim import SGD, Adam

optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9) # stochastic gradent descent
# optimizer = Adam(model.parameters())

In [21]:
# loss function
criterion = nn.CrossEntropyLoss()

# Training function

In [30]:
def train(model, train_loader, optimizer, criterion, val_loader, epochs):
  history = [] # keep track accuracy and loss for every epoch 
  for e in range(epochs): # a training step
    model.train()

    train_running_loss = 0
    train_running_corrects = 0
    train_size = 0
    val_running_loss = 0
    val_running_corrects = 0
    val_size = 0 

    for batch in train_loader: # iterate over each batch of size 128
      x, y = batch # x: images in tensors, y: labels for images
      x = x.to(device)
      y = y.to(device) 

      # clear the gradient (gradient = multiple derivatives - each is the derivative of the loss with respect to the weight in the model)
      optimizer.zero_grad()

      ## train ##
      probs = model(x)
      loss = criterion(probs, y) # compute the loss of the model at the current state

      loss.backward() # give the optimizer the gradients to update the weights
      
      optimizer.step() # update the weught

      # compute accuracy and loss for training batch
      _, preds = probs.max(1) # preds: the digits that the model predict for the batch of images
      corrects = (preds==y).sum()
      train_running_corrects += corrects 

      train_running_loss += loss.item()*x.size(0)
      train_size += x.size(0)

    else:
      model.eval()

      with torch.no_grad():
        for batch in val_loader: # iterate over each batch of size 128
          x, y = batch # x: images in tensors, y: labels for images
          x = x.to(device)
          y = y.to(device) 

          probs = model(x)
          loss = criterion(probs, y)

          _, preds = probs.max(1) # preds: the digits that the model predict for the batch of images
          corrects = (preds==y).sum()
          val_running_corrects += corrects 

          val_running_loss += loss.item()*x.size(0)
          val_size += x.size(0)
    
    # after the training
    # compute accuracy and the loss for the epoch
    train_acc = train_running_corrects/train_size
    train_loss = train_running_loss/train_size
    val_acc = val_running_corrects/val_size
    val_loss = val_running_loss/val_size

    print(f'Finished epoch {e}, training accuracy={train_acc*100:.2f}% training loss={train_loss:.2f}')
    print(f'\t Validation accuracy={val_acc*100:.2f}% validation loss={val_loss:.2f}')

    # log 
    history.append((train_acc, train_loss, val_acc, val_loss))

  return history


  

# Testing function

# Plot functions for training accuracy and loss accuracy

# Create a model and train

In [31]:
model = create_fc_model_v1(device) # create a model instance
optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9) # stochastic gradent descent
criterion = nn.CrossEntropyLoss()

train(model, train_loader, optimizer, criterion, val_loader, 20)

  input = module(input)


Finished epoch 0, training accuracy=35.95% training loss=2.09
	 Validation accuracy=73.01% validation loss=1.73
Finished epoch 1, training accuracy=74.85% training loss=1.71
	 Validation accuracy=76.81% validation loss=1.69
Finished epoch 2, training accuracy=84.01% training loss=1.62
	 Validation accuracy=93.64% validation loss=1.53
Finished epoch 3, training accuracy=93.05% training loss=1.53
	 Validation accuracy=93.87% validation loss=1.52
Finished epoch 4, training accuracy=94.05% training loss=1.52
	 Validation accuracy=95.40% validation loss=1.51
Finished epoch 5, training accuracy=94.72% training loss=1.51
	 Validation accuracy=94.94% validation loss=1.51
Finished epoch 6, training accuracy=94.88% training loss=1.51
	 Validation accuracy=94.37% validation loss=1.52
Finished epoch 7, training accuracy=95.07% training loss=1.51
	 Validation accuracy=95.69% validation loss=1.50
Finished epoch 8, training accuracy=95.16% training loss=1.51
	 Validation accuracy=95.36% validation lo

[(tensor(0.3595, device='cuda:0'),
  2.0908204146575926,
  tensor(0.7301, device='cuda:0'),
  1.7290974662780763),
 (tensor(0.7485, device='cuda:0'),
  1.7118574436187743,
  tensor(0.7681, device='cuda:0'),
  1.6904966009140014),
 (tensor(0.8401, device='cuda:0'),
  1.6220210431671143,
  tensor(0.9364, device='cuda:0'),
  1.5261040468215943),
 (tensor(0.9305, device='cuda:0'),
  1.5314879206466674,
  tensor(0.9387, device='cuda:0'),
  1.5237517749786378),
 (tensor(0.9405, device='cuda:0'),
  1.520832569847107,
  tensor(0.9540, device='cuda:0'),
  1.507130153465271),
 (tensor(0.9472, device='cuda:0'),
  1.5141454622268677,
  tensor(0.9494, device='cuda:0'),
  1.512359937286377),
 (tensor(0.9488, device='cuda:0'),
  1.5125033359527589,
  tensor(0.9437, device='cuda:0'),
  1.5174325923919678),
 (tensor(0.9507, device='cuda:0'),
  1.5102570069122314,
  tensor(0.9569, device='cuda:0'),
  1.503946541786194),
 (tensor(0.9516, device='cuda:0'),
  1.509389729537964,
  tensor(0.9536, device='cud

# Visualize and performance

# Test on real data
