<a href="https://colab.research.google.com/github/tapasML/EVAB2/blob/main/ByClass_Split_Session_4_Q3X_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#                                                 **This is EMNIST**

#   This notebook model uses split 'ByClass'
# Split 'byclass' is **unbalanced** (training accuracy lesser) 
# compared to digits/ MNIST/ letters (**balanced** train set)

#Note: The results between 'byClass' and 'digits' vary a lot.
#ByClass is unbalanced (unequal distribution of labels)



In [1]:
#import torch, tochvision packages 
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
import torchvision 
import torchvision.transforms as transforms  

#define device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#download the dataset in /data_emnist folder as Tensors
emnist_train_set = torchvision.datasets.EMNIST(
    root='./data_emnist'
    ,train=True
    ,split='byclass' #split type =byclass
    #,split='digits' #split type =digits
    ,download=True
    ,transform=transforms.Compose([
        # if split 'byclass' vertical 90 degree + horizontal flip image to dispay properly.
        #lambda img: torchvision.transforms.functional.rotate(img, -90), 
        #lambda img: torchvision.transforms.functional.hflip(img),
        transforms.ToTensor()
    ])
)
dataset_size= len(emnist_train_set)
print('dataset size = ', dataset_size) #around 700K images for byClass dataset

Downloading and extracting zip archive
Downloading http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to ./data_emnist/EMNIST/raw/emnist.zip


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data_emnist/EMNIST/raw/emnist.zip to ./data_emnist/EMNIST/raw
Processing byclass


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Processing bymerge
Processing balanced
Processing letters
Processing digits
Processing mnist
Done!
dataset size =  697932


# **Build the Network**

In [2]:
#@disclaimer: get_num_correct() function is example of plagiarism

#utility method to count how many predictions match label.
def get_num_correct(preds, labels):
  return preds.argmax(dim=1).eq(labels).sum().item()

#define the network
class Emnist_Network(nn.Module):
    def __init__(self):
        super().__init__() #initialize parent       
        self.conv1 = nn.Conv2d(in_channels=1,  out_channels=10, kernel_size=3, padding=1)#input:28X28X1,  out:28X28X10, kernel:3X3, RF: 3
        self.conv2 = nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, padding=1)#input:28X28X10, out:28X28X10, kernel:3X3, RF: 5
        self.pool1 = nn.MaxPool2d(2, 2)  #input:28X28X10, out:14X14X10, kernel:2X2, stride=2, RF: 10

        self.conv3 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, padding=1)#input:14X14X10, out:14X14X20, kernel:3X3, RF: 12
        self.conv4 = nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, padding=1)#input:14X14X20, out:14X14X20, kernel:3X3, RF: 14
        self.pool2 = nn.MaxPool2d(2, 2)  #input:14X14X20, out:7X7X20, kernel:2X2, stride=2, RF: 28

        self.conv5 = nn.Conv2d(in_channels=20, out_channels=30, kernel_size=3) #input:7X7X20, out:5X5X30, kernel:3X3, RF: 30

        # for split by class
        self.conv6 = nn.Conv2d(in_channels=30, out_channels=62, kernel_size=3) #input:5X5X30, out:3X3X62, kernel:3X3, RF: 32      
        self.avg_pool = nn.AvgPool2d(kernel_size=3, stride=3) #input:3X3X62, out:1X1X62, kernel:3X3, stride=3, RF: 32  


        # use the following block for 'digits' split
        #self.conv6 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=3) #input:5X5X30, out:3X3X62, kernel:3X3, RF: 32      
        #self.avg_pool = nn.AvgPool2d(kernel_size=3, stride=3) #input:3X3X62, out:1X1X62, kernel:3X3, stride=3, RF: 32          

    def forward(self, t):
        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)      

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)

        # (4) hidden max_pool layer
        t = self.pool1(t)

        # (5) hidden conv layer
        t = self.conv3(t)
        t = F.relu(t)

        # (6) hidden conv layer
        t = self.conv4(t)
        t = F.relu(t)

        # (7) hidden max_pool layer
        t = self.pool2(t)

        # (8) hidden conv layer
        t = self.conv5(t)
        t = F.relu(t)

        # (9) hidden conv layer
        t = self.conv6(t)        
        # do not use ReLU here

        # (10) hidden conv layer       
        t = self.avg_pool(t)        

        t = t.view(-1, 62)      #flatten for split by class
        #t = t.view(-1, 10)     #use for split by 'digits'      
        t = F.log_softmax(t, dim=1)#output with log softmax

        return t

# Train Network

In [None]:
torch.set_grad_enabled(True) #need calculate gradients

# Make sure trains in GPU only
network = Emnist_Network().to(device)

#load the training data as batches of 100
emnist_train_loader = torch.utils.data.DataLoader(
    emnist_train_set
    ,batch_size=100
    ,shuffle = True)

# debug pupose
print(network)

#select optmizer
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(20):   #limit to 20 epochs
    total_loss = 0    
    total_correct = 0
    #counter=0 #used for debug only

    for batch in emnist_train_loader: # Get Batch
        #if(counter > 100): #for debug purpose only to see if network is learning
        #  break
        #counter+=1      
        images, labels = batch        
        images, labels=images.to(device), labels.to(device)    # input and labels need to be sent to GPU  
           
        preds = network(images) # Pass Batch        
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad() # reset gradients for each batch
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)     

    print(
        "epoch", epoch, 
        "total_correct ,  % of correct", total_correct, (total_correct/dataset_size)*100,
        "loss:", total_loss
    )

Emnist_Network(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(20, 30, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(30, 62, kernel_size=(3, 3), stride=(1, 1))
  (avg_pool): AvgPool2d(kernel_size=3, stride=3, padding=0)
)
