In [22]:
from google.colab import files, drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
# this is yourpart create a CNN model for 10-class digit recognition classification problem
#create/define a CNN model for handwriting digit recognition problem

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Net(nn.Module):

  def __init__(self):

    super().__init__()
    
    #define first conv layer(input channel:1, output_channel:6, filter_size:5*5)
    #define pooling layer(2*2)
    #define second conv layer(input channel:6, output_channel:16, filter_size:5*5)
    #define a sub fully connected feedforward network
    #hidden size (1):120
    #hidden size(2): 84
    #  output size: 10




    #define/add a cov layer
  #1*6*    5*5
    self.conv1 = nn.Conv2d(1, 6, 5) #After 1st conv 6*24*24
    self.pool = nn.MaxPool2d(2, 2)  #6*12*12
    self.conv2 = nn.Conv2d(6, 16, 5)  #16*8*8->After pool 16*4*4
    self.fc1 = nn.Linear(16*4*4, 120) #ourinput image is 28*28,
    self.fc2 = nn.Linear(120, 84)
    self.fc3=nn.Linear(84,10)
    self.do1 = nn.Dropout(0.5)

#forward
# x-->conv1-->relu-->pooling-->conv2-->relu-->pooling-->fullyconnected
 
 
  def forward(self, x):
    x=self.pool(F.relu(self.conv1(x)))
    self.do1 = nn.Dropout(0.5)
    x=self.pool(F.relu(self.conv2(x)))
    self.do1 = nn.Dropout(0.5)
    x=x.view(-1,self.num_flat_features(x))
    x=F.relu(self.fc1(x))
    self.do1 = nn.Dropout(0.5)
    x=F.relu(self.fc2(x))
    self.do1 = nn.Dropout(0.5)
    x=F.log_softmax(self.fc3(x))
    return x

    
  def num_flat_features(self, x):
    size = x.size()[1:]
    num_features=1
    for s in size:
      num_features*= s

    return num_features
    
net=Net().to(device)
print(net)

params=list(net.parameters())
for i in range(len(params)):
  print(params[i].size())


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (do1): Dropout(p=0.5, inplace=False)
)
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 256])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [24]:
#Create a customize Dataset
#The torch provide you with thw dataset class we want to inherit it(create a subclass) to create my own customized dataset
#Within my dataset I have many instances, each instance may contain properties
#Each instance in customized dataset to have 2 field 1 is the image itself and the other is the label
#Safe each istance as a dictionary with each instance 2 value pairs  

import os 
import glob #Design for filr opwerations 
import numpy as np  #package to process images. it can convert image to Numpy array
from skimage import io  

from torch.utils.data import Dataset, DataLoader

#override __init__,__len__,and __getitem__ methods



class MNISTDatasets(Dataset):
#MNISTDataset is a subclass of Dataset
#Override __init__,__len__, and __getitem__ methods
  
  def __init__(self,dir,transform=None):
    self.dir=dir  #for example: /content/drive/My Drive/MNIST/trainingset/1/
    self.transform=transform

  def __len__(self):
    files=glob.glob(self.dir+'/*.jpg')[:1000]
    return len(files)

  def __getitem__(self,idx):
    if torch.is_tensor(idx):
      idx=idx.tolist()
      #provide index to images

    all_files=glob.glob(self.dir+'/*.jpg')[:1000] #return a list of file names
    img_fname =os.path.join(self.dir,all_files[idx]) #obtain a absolute path to that file with index idx
    image=io.imread(img_fname) # to a numpy array for that image 

    digit=int(self.dir.split('/')[-1].strip())  # /content/drive/My Drive/MNIST/trainingset/1/
    label=np.array(digit)
    sample={'image':image, 'label':label}

    if self.transform:
      sample=self.transform(sample)

    return sample





In [25]:
# create/define a customized transformation for each instance in the dataset
#Why rescale?
# Size of all input image may not be the same , some images are of the size 28*8 and some images are of the size 28*30 
#The sizes need to be consistant otherwise it cannot be put in the same model

from skimage import transform 
from torchvision import transforms, utils

class Rescale(object):

  def __init__(self,output_size):
    assert isinstance(output_size,(int,tuple)) #Check Output size must be either integer or tuple
    self.output_size=output_size
  
  def __call__(self,sample):
    image, label = sample['image'], sample['label'] #image is a numpy array #label is a numpy array but within that ther is only 1 dimension
    h, w = image.shape[:2]  #Get the last 2 dimension
    #no of rows(height) and number of column(w) from the numpy array
    if isinstance(self.output_size,int):
      if h>w: 
        new_h, new_w =self.output_size*h/w, self.output_size  
      else:
        new_h, new_w =self.output_size, self.output_size*w/h
    else:
      new_h, new_w = self.output_size

    new_h, new_w= int(new_h),int(new_w)
    
    new_img= transform.resize(image, (new_h,new_w)) #Resize the image using new_h and new_w

    return{'image':new_img,'label':label} #Return a dictionary

class ToTensor(object): #Convert every instance to a Tensor
  def __call__(self, sample):
    image, label=sample['image'], sample['label']

    image=image.reshape((1,image.shape[0],image.shape[1])) #3D to use it for CNN add a channel 1

    return {'image':torch.from_numpy(image), 'label': torch.from_numpy(label)} #Convert into tensor



In [26]:
# create train/val dataloader

from torch.utils.data import random_split

batch_size = 32
list_datasets=[]
for i in range(10):
  cur_ds = MNISTDatasets('/content/drive/My Drive/MNIST/trainingset/'+str(i), transform=transforms.Compose([Rescale(28),ToTensor()]))
  list_datasets.append(cur_ds)  #10 seperate datasets

dataset = torch.utils.data.ConcatDataset(list_datasets) #Combine the 10 different datasets
print(len(dataset))
#1000 instances Each instance is a dictionary with 2 key value pair 1st is image 2nd is label 
#Value for 1st key value pair is a tensor with dimension 1*28*28 the label tensor is 1*1


10000


In [27]:
train_size=int(len(dataset)*0.7)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset,[train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=1)
val_dataloader= DataLoader(val_dataset,batch_size,shuffle=True,num_workers=1)
print(train_size)

7000


In [28]:
#training/validation

epochs =20
learning_rate =1e-3
optimizer = optim.Adam(net.parameters(),lr=learning_rate, weight_decay=1e-5)
criterion=nn.CrossEntropyLoss()

for epoch in range(epochs):
  
  net.train() #update parameters
  running_loss =0.0
  for b, samples in enumerate(train_dataloader):
    inputs,targets=samples['image'].to(device, dtype=torch.float), samples['label'].to(device,dtype=torch.long)

    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs,targets)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if (b+1)%10 == 0:
      print('epoch %d, batch: %d, training loss: %.3f'%(epoch+1, b+1, running_loss/10)) #For every 10 batches
      running_loss =0.0 #Reset running_loss
   #validation
  net.eval()
  correct = [0.0]*10
  total = [0.]*10
  classes=range(10)
   

  
  with torch.no_grad():
    for b, data in enumerate(val_dataloader):
      images, labels = data['image'].to(device,dtype=torch.float), data['label'].to(device,dtype=torch.long)
      outputs=net(images)

      _,predicted = torch.max(outputs,1)
      c=(predicted == labels)
      for i in range(len(labels)):
        label=labels[i]
        correct[label] +=c[i].item()
        total[label]+=1 

  for i in range(10):
      print('\t Validation accuracy for digit %d: %.2f'% (classes[i], 100*correct[i]/total[i]))          






epoch 1, batch: 10, training loss: 2.299
epoch 1, batch: 20, training loss: 2.271
epoch 1, batch: 30, training loss: 2.185
epoch 1, batch: 40, training loss: 1.942
epoch 1, batch: 50, training loss: 1.494
epoch 1, batch: 60, training loss: 1.001
epoch 1, batch: 70, training loss: 0.919
epoch 1, batch: 80, training loss: 0.787
epoch 1, batch: 90, training loss: 0.801
epoch 1, batch: 100, training loss: 0.623
epoch 1, batch: 110, training loss: 0.545
epoch 1, batch: 120, training loss: 0.425
epoch 1, batch: 130, training loss: 0.399
epoch 1, batch: 140, training loss: 0.488
epoch 1, batch: 150, training loss: 0.415
epoch 1, batch: 160, training loss: 0.344
epoch 1, batch: 170, training loss: 0.412
epoch 1, batch: 180, training loss: 0.303
epoch 1, batch: 190, training loss: 0.409
epoch 1, batch: 200, training loss: 0.336
epoch 1, batch: 210, training loss: 0.399
	 Validation accuracy for digit 0: 96.93
	 Validation accuracy for digit 1: 97.32
	 Validation accuracy for digit 2: 86.21
	 Va