In [2]:
from google.colab import files,drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# creating a CNN model for handwritiing digit recognition problem 
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim


In [4]:
#to enable GPU 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [178]:
class Net(nn.Module):
  def __init__(self):
    super().__init__() 
    
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.pool= nn.MaxPool2d(2,2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*4*4, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)
    self.conv2_drop = nn.Dropout2d()
    

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, self.num_flat_features(x))   #to flatten, conevert x to batches
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.log_softmax(self.fc3(x))
    #x = F.dropout(x, training=self.training)
    
    return x
  #input image =1x28x28, white and black image 
#here we define  first conv layer (input channel: 1, output_channel:6, filter_size: 5*5)
#define pooling layer (2x2)
#second conv layer (input channel: 6, output_channel:16, filter_size: 5*5)
#define a sub fully connected feedforward network 
# hidden size (1):120
#hidden size (2): 84
#output size: 10 

  def num_flat_features(self,x):
    size= x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s
    return num_features


In [179]:
net = Net().to(device)

In [180]:
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
)


In [181]:
#we will create a customized the dataset , we will save as a dictionary, eg: image of 2 will have label 2 
import os
import glob
import numpy as np  #for us to create the dictionary 
from skimage import io #it can convert image to numpy array 
from torch.utils.data import Dataset, DataLoader

In [182]:
class MNISTDataset(Dataset):
  def __init__(self, dir, transform=None): # we are storing it in dir and we are not transforming the instances
    self.dir = dir
    self.transform = transform

  def __len__(self):  #instances in dataset 
    files = glob.glob(self.dir+'/*.jpg')[:100] # we are reading only 100 images for faster compilation
    return len(files)   #number of files

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()       #convert from tesor to list 
    all_files = glob.glob(self.dir+'/*.jpg')[:100] #returns a list of filenames
    img_fname = os.path.join(self.dir, all_files[idx]) #obtain an absolute path to file with index 
    image = io.imread(img_fname) # numpy array for image
    digit = int(self.dir.split('/')[-1].strip()) 
    label = np.array(digit)    #

    instance = {'image':image,'label':label} #dictinary with 2 key:value pairs , image:label

    if self.transform:
      instance = self.transform(instance)

    return instance
  


In [183]:
#create a customized transformation for each instance in dataset 
#We will try to rescale the image here
from skimage import transform
from torchvision import transforms, utils

In [184]:
class Rescale(object):
   def __init__(self, output_size):
     assert isinstance(output_size, (int, tuple))   #check if tuple 
     self.output_size = output_size

   def __call__(self, sample):      #we provide the new size (after rescaling), sample is a dictionary 
     image, label = sample['image'], sample['label']

     h, w = image.shape[-2:]    #height,width(rows,cols)
     if isinstance(self.output_size, int):    #if innt else tuple
       if h > w:
         new_h, new_w = self.output_size*h/w, self.output_size
       else:
         new_h, new_w = self.output_size, self.output_size*w/h
     else:
       new_h, new_w = self.output_size
   
     new_h, new_w = int(new_h), int(new_w)
     new_image = transform.resize(image, (new_h, new_w)) 
   
       #this is the new sample 
     return {'image': new_image, 'label':label}

In [185]:
#convert every instance to tensor , we will just overwrite call not the initalize function
class ToTensor(object):
   def __call__(self, sample):
     image, label = sample['image'], sample['label']
     image = image.reshape((1,image.shape[0],image.shape[1]))
     return {'image':torch.from_numpy(image) ,'label': torch.from_numpy(label)} #image: convert to tensor from numpy

In [186]:
from torch.utils.data import random_split
from torchvision import transforms, utils

In [187]:
#create train/val dataloader

batch_size = 32
list_datasets = []

for i in range(10):     #we are combining 10 folders into one dataset (training)
  cur_ds = MNISTDataset('/content/drive/My Drive/MNIST/trainingset/'+str(i), transform=transforms.Compose([Rescale(28), ToTensor()]))
  list_datasets.append(cur_ds)

dataset = torch.utils.data.ConcatDataset(list_datasets) #this will have all instances with transformation
print(len(dataset))

1000


In [188]:
train_size = int(len(dataset)*0.7)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset,[train_size, val_size])

train_dataloader = DataLoader(train_dataset,batch_size,shuffle=True,num_workers=1)
val_dataloader = DataLoader(val_dataset,batch_size,shuffle=True,num_workers=1)

In [196]:
#Train the model and valiadation 

epoch = 5
learning_rate =1e-3
optimizer = optim.Adam(net.parameters(),lr=learning_rate, weight_decay=1e-5)
criterion= nn.CrossEntropyLoss()

for epoch in range(epoch):
  net.train()
  running_loss =0.0
  
  for batch_idx, batch in enumerate(train_dataloader):
    inputs, targets = batch['image'].to(device,dtype=torch.float), batch['label'].to(device,dtype= torch.long)

    predicted_outputs = net(inputs)
    optimizer.zero_grad()
   
    loss = criterion(predicted_outputs,targets)
    loss.backward()
    optimizer.step()

    running_loss +=loss.item()
    if (batch_idx+1)%10==0:
      print('epoch %d, batch : %d, training_loss: %.3f'%(epoch+1, batch_idx+1, running_loss/10))
      running_loss=0.0

  net.eval()

  correct = [0.0]*10
  total= [0.0]*10 

  with torch.no_grad():
    for batch_idx, batch in enumerate(val_dataloader):
      inputs, labels = batch['image'].to(device,dtype=torch.float), batch['label'].to(device,dtype= torch.long)
      predicted_outputs = net(inputs)

      _,predicted_labels = torch.max(predicted_outputs,1)
      c= (predicted_labels == labels)

      for i in range(len(labels)):
        label = labels[i]   #true labels 
        correct[label] += c[i].item()     #to check if it correct
        total[label] += 1   #total for that digit
  for i in range(10):
    print('\t Validation accuracy for digit %d: %.2f'%(i, 100*correct[i]/total[i]))

  





epoch 1, batch : 10, training_loss: 0.262
epoch 1, batch : 20, training_loss: 0.323
	 Validation accuracy for digit 0: 89.29
	 Validation accuracy for digit 1: 96.30
	 Validation accuracy for digit 2: 88.46
	 Validation accuracy for digit 3: 93.55
	 Validation accuracy for digit 4: 93.33
	 Validation accuracy for digit 5: 91.43
	 Validation accuracy for digit 6: 93.55
	 Validation accuracy for digit 7: 82.35
	 Validation accuracy for digit 8: 56.67
	 Validation accuracy for digit 9: 89.29
epoch 2, batch : 10, training_loss: 0.191
epoch 2, batch : 20, training_loss: 0.207
	 Validation accuracy for digit 0: 89.29
	 Validation accuracy for digit 1: 92.59
	 Validation accuracy for digit 2: 80.77
	 Validation accuracy for digit 3: 93.55
	 Validation accuracy for digit 4: 93.33
	 Validation accuracy for digit 5: 91.43
	 Validation accuracy for digit 6: 90.32
	 Validation accuracy for digit 7: 91.18
	 Validation accuracy for digit 8: 80.00
	 Validation accuracy for digit 9: 75.00
epoch 3, bat

In [None]:
#x = F.relu(F.max_pool2d(self.conv1(x), 2))
    #x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))