In [None]:
## Author- Sayan Chandra
## Roll - CS20M057
## Instructor - Mitesh M. Khapra
## Course - CS6910 (Fundamentals of Deep Learning)

In [2]:
WANDB=0

In [3]:
if WANDB:
  !pip install wandb

In [7]:
if WANDB:
  !wandb login #952756aa88ee3a472980bceb7d23632ac0a85500

In [8]:
if WANDB:
  import wandb

In [None]:
## importing all necessary modules
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2

import torch
import torch.nn as cnn
import torch.optim as optimisations
from torch.nn import functional as func
from torch.utils.data import DataLoader as dataloader
import torchvision.transforms as transforms
import torchvision
from torch.autograd import Variable
import gc

In [None]:
!pip install split-folders
import splitfolders as sf

In [None]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip nature_12K.zip

In [None]:
MAINPATH = "/content/inaturalist_12K/"
sf.fixed(MAINPATH + "train", output=MAINPATH+"train_split", seed=1337, fixed=100, oversample=False, group_prefix=None)

Copying files: 9999 files [00:52, 190.64 files/s]


In [None]:
sizew=224; sizeh=224
data_transforms = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [None]:
def loadTrain_Val_TestData(BATCH_SIZE): # no arguments
  TESTPATH="/content/inaturalist_12K/val"
  train_data = torchvision.datasets.ImageFolder(root=MAINPATH+"train_split/train/", transform=data_transforms)
  val_data = torchvision.datasets.ImageFolder(root=MAINPATH+"train_split/val/", transform=data_transforms)
  train_data_loader = dataloader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=1)
  val_data_loader = dataloader(val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=1)
  test_data = torchvision.datasets.ImageFolder(root=TESTPATH, transform=data_transforms)
  test_data_loader  = dataloader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=1)
  return train_data, test_data, val_data, train_data_loader, test_data_loader, val_data_loader


In [None]:
sweep_config={
    'method' : 'bayes',
    'metric' : {
        'name' : 'Val Accuracy',
        'goal' : 'maximize',
    },
    'parameters' : {
        'epochs' : {
            'values' : [7, 10, 15],
        },
        'batch_size' : {
            'values' : [8, 16, 32],
        },
        'out_layer_size' : {
            'values' : [[32, 64, 128, 256, 512], [256, 128, 64, 32, 16], [64, 64, 64, 64, 64], [128, 128, 256, 256, 512], [64, 64, 32, 32, 32], [64, 64, 64, 32, 32]],
        },
        'convkernel' : {
            'values' : [[7, 7, 5, 5, 3],  [5, 5, 3, 3, 3], [11, 7, 7, 5, 3]]
        },
        'convstride' : {
            'values' : [[1,1,1,1,1], [1, 1, 1, 2, 2], [2, 2, 2, 1, 1]]
        },
        'poolkernel' : {
            'values' : [[2, 2, 2, 2, 2], [3, 3, 3, 2, 2]]
        },
        'poolstride' : {
            'values' : [[1, 2, 2, 2, 2], [1, 2, 1, 2, 1], [1, 1, 1, 1, 1]]
        },
        'denselayer' : {
            'values' : [ 32, 64, 128]
        },
        'learning_rate' : {
            'values' : [0.001, 0.002, 1.7e-4, 2.2e-4, 2.2e-5, 0.0014, 0.0022, 1.5e-5],
        },
        'batchnorm' : {
            'values' : [1, 0]
        },
        'dropout' : {
            'values' : [0.5, 0.25, 0.1, 0.2]
        },
        'weightdecay' : {
            'values' : [0, 0.00001, 0.00002]
        }
    }
}
 if WANDB: sweep_id = wandb.sweep(sweep_config, entity="blackcloud", project="cs6910_dl_assignment_2")

In [None]:


numOfConvPoolLayers=5
sizew=224; sizeh=224
def computeFirstDenseLayer(cnn_config):
  #print(cnn_config, 1)
  w,h,d=sizew,sizeh,3
  for i in range( numOfConvPoolLayers):
    d=cnn_config[i][1]
    w=1+np.floor((w-cnn_config[i][2]+2* cnn_config[i][4])/cnn_config[i][3])
    w=1+np.floor((w-cnn_config[i][5])/cnn_config[i][6])
    h=w #1+np.floor((h-cnn_config[i][5])/cnn_config[i][6])
  #print(w,h,d)  
  return w*h*d
#print(computeFirstDenseLayer(cnn_config))
def getconfig(out_layer_size, convkernel, convstride, poolkernel, poolstride):
  #print(2)
  in_layer_size=[3]+out_layer_size[:-1]
  padding=[0 for _ in range (numOfConvPoolLayers)]
  cnn_config=[in_layer_size, out_layer_size, convkernel, convstride, padding, poolkernel, poolstride]
  ret=np.transpose(cnn_config)
  print('the cnn_config is ',ret)
  return ret

In [None]:
class CNN(cnn.Module):
  def __init__(self, cnn_config, in_channels=3, num_classes=10, denselayer=64, prob=0.2):
    super(CNN, self).__init__()
    #print(cnn_config, 3)

    self.conv1=cnn.Conv2d(in_channels=cnn_config[0][0], out_channels=cnn_config[0][1], kernel_size= cnn_config[0][2], stride= cnn_config[0][3], padding= cnn_config[0][4])
    cnn.init.xavier_uniform_(self.conv1.weight)
    self.activ=self.Activ2d("relu")
    self.batchnorm1=cnn.BatchNorm2d(cnn_config[0][1])
    self.maxpool1=cnn.MaxPool2d(cnn_config[0][5], stride=cnn_config[0][6])

    self.conv2=cnn.Conv2d(in_channels=cnn_config[1][0], out_channels=cnn_config[1][1], kernel_size= cnn_config[1][2], stride= cnn_config[1][3], padding= cnn_config[1][4])
    cnn.init.xavier_uniform_(self.conv2.weight)
    self.batchnorm2=cnn.BatchNorm2d(cnn_config[1][1])
    self.maxpool2=cnn.MaxPool2d(cnn_config[1][5], stride=cnn_config[1][6])
    
    self.conv3=cnn.Conv2d(in_channels=cnn_config[2][0], out_channels=cnn_config[2][1], kernel_size= cnn_config[2][2], stride= cnn_config[2][3], padding= cnn_config[2][4])
    cnn.init.xavier_uniform_(self.conv3.weight)
    self.batchnorm3=cnn.BatchNorm2d(cnn_config[2][1])
    self.maxpool3=cnn.MaxPool2d(cnn_config[2][5], stride=cnn_config[2][6])

    self.conv4=cnn.Conv2d(in_channels=cnn_config[3][0], out_channels=cnn_config[3][1], kernel_size= cnn_config[3][2], stride= cnn_config[3][3], padding= cnn_config[3][4])
    cnn.init.xavier_uniform_(self.conv4.weight)
    self.batchnorm4=cnn.BatchNorm2d(cnn_config[3][1])
    self.maxpool4=cnn.MaxPool2d(cnn_config[3][5], stride=cnn_config[3][6])

    self.conv5=cnn.Conv2d(in_channels=cnn_config[4][0], out_channels=cnn_config[4][1], kernel_size= cnn_config[4][2], stride= cnn_config[4][3], padding= cnn_config[4][4])
    cnn.init.xavier_uniform_(self.conv5.weight)
    self.batchnorm5=cnn.BatchNorm2d(cnn_config[4][1])
    self.maxpool5=cnn.MaxPool2d(cnn_config[4][5], stride=cnn_config[4][6])
    
    self.val=computeFirstDenseLayer(cnn_config).astype(int)
    self.fullconn1=cnn.Linear(in_features=self.val, out_features= denselayer)
    cnn.init.xavier_uniform_(self.fullconn1.weight)
    self.dropout=cnn.Dropout(p=prob)
    self.output=cnn.Linear(in_features= denselayer, out_features= num_classes)

  def forward(self, curinp, bn=0):
    out=curinp
    if bn:
        out=self.maxpool1(self.batchnorm1(self.activ(self.conv1(out))))
        out=self.maxpool2(self.batchnorm2(self.activ(self.conv2(out))))
        out=self.maxpool3(self.batchnorm3(self.activ(self.conv3(out))))
        out=self.maxpool4(self.batchnorm4(self.activ(self.conv4(out))))
        out=self.maxpool5(self.batchnorm5(self.activ(self.conv5(out)))) #self.batchnorm5(


    else:
        out=self.maxpool1(self.activ(self.conv1(out)))
        out=self.maxpool2(self.activ(self.conv2(out)))
        out=self.maxpool3(self.activ(self.conv3(out)))
        out=self.maxpool4(self.activ(self.conv4(out)))
        out=self.maxpool5(self.activ(self.conv5(out))) #self.batchnorm5(

    out=out.view(-1, self.val)
    out=self.activ(self.fullconn1(out))
    out=self.dropout(out)
    out=func.softmax(self.output(out), dim=1) 
    return out

  def Activ2d(self, str):
    if str=="relu" : return func.relu


In [None]:
def accuracyAndLoss(data_loader, cnnModel, heyGPU, optimizer, scheduler, bn, lossfunc) :
      predictedright=0
      totalimgs=0
      globalloss=0
      with torch.no_grad():
          for img, y in data_loader:
              if heyGPU : img, y=Variable(img.cuda()), Variable(y.cuda())
              else : img, y=Variable(img), Variable(y)
              outp=cnnModel(img, bn)
              curloss=lossfunc(outp, y)
              globalloss+=curloss
              ignore, predicted = torch.max(outp.data, 1)
              totalimgs+=y.size(0)
              if heyGPU : predictedright+=(predicted.cpu()==y.cpu()).sum()
              else : predictedright+=(predicted==y).sum()
      curaccuracy=(predictedright/totalimgs)*100
      del predictedright, totalimgs
      return curaccuracy.item(), globalloss.item()/1000 
def trainMyModel(EPOCHS, cnnModel, train_data_loader, heyGPU, optimizer, scheduler, val_data_loader, bn, lossfunc):
    for run in range(EPOCHS):
      globalloss=0
      cnnModel.train(True)
      for i, (curimg, ytrue) in enumerate(train_data_loader):
        if heyGPU : curimg, ytrue=Variable(curimg.cuda()), Variable(ytrue.cuda())
        else : curimg, ytrue=Variable(curimg), Variable(ytrue)
        #print("yoo")
        optimizer.zero_grad()
        outputt=cnnModel(curimg, bn)
        curloss=lossfunc(outputt, ytrue)
        globalloss+=curloss
        curloss.backward()
        optimizer.step()
        scheduler.step()
        #print("yooy")

      c, d=accuracyAndLoss(val_data_loader, cnnModel, heyGPU, optimizer, scheduler, bn, lossfunc)  

      print("epochs: ",run, "Training loss: ", globalloss/8999, "val accuracy+loss ", c, d)
      if WABDB: wandb.log({"epochs":run, "Training loss":globalloss.item()/8999, "Val Accuracy":c, "Val Loss":d})


In [None]:
gc.collect()
torch.cuda.empty_cache()
def SweepParent():
       if WANDB:
              start=wandb.init()
              config=start.config
              bsz=config.batch_size
              train_data, test_data, val_data, train_data_loader, test_data_loader, val_data_loader = loadTrain_Val_TestData(bsz)
              ols=config.out_layer_size
              cnn_config=getconfig(ols, config.convkernel, config.convstride, config.poolkernel, config.poolstride)
              cnnModel=CNN(cnn_config, denselayer=config.denselayer, p=config.dropout)
              heyGPU=torch.cuda.is_available()
              if heyGPU: cnnModel=cnnModel.cuda()
              lossfunc=cnn.CrossEntropyLoss()
              print(heyGPU)
              #optimizer=optimisations.Adam(cnnModel.parameters(), lr=0.006)
              #optimizer=optimisations.AdamW(cnnModel.parameters(), lr=0.001, weight_decay=0.005)
              optimizer = torch.optim.SGD(cnnModel.parameters(), lr=config.learning_rate, momentum=0.92, weight_decay=config.weightdecay)
              scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=int(np.ceil(len(train_data_loader)/bsz)))
              epk=config.epochs
              if ols==[256, 128, 64, 32, 16] : epk=5
              trainMyModel(epk, cnnModel, train_data_loader, heyGPU, optimizer, scheduler, val_data_loader, config.batchnorm, lossfunc)
              del cnnModel
              del optimizer, scheduler, epk, ols, heyGPU, bsz, start, config
              gc.collect()
              torch.cuda.empty_cache()
       else:
              bsz=64
              train_data, test_data, val_data, train_data_loader, test_data_loader, val_data_loader = loadTrain_Val_TestData(bsz)
              cnn_config=[[3, 64, 11, 2, 0, 2, 1],
                          [64, 64, 7, 2, 0, 2, 1],
                          [64, 32, 5, 2, 0, 2, 1],
                          [32, 32, 3, 1, 0, 2, 1],
                          [32, 32, 3, 1, 0, 2, 1]]
              cnnModel=CNN(cnn_config, denselayer=64, prob=0.2)
              heyGPU=torch.cuda.is_available()
              if heyGPU: cnnModel=cnnModel.cuda()
              lossfunc=cnn.CrossEntropyLoss()
              print(heyGPU)
              #optimizer=optimisations.Adam(cnnModel.parameters(), lr=0.006)
              #optimizer=optimisations.AdamW(cnnModel.parameters(), lr=0.001, weight_decay=0.005)
              optimizer = torch.optim.SGD(cnnModel.parameters(), 0.0022, momentum=0.94, weight_decay=0)
              scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=int(np.ceil(len(train_data_loader)/bsz)))
              epk=15 ; lossfunc=cnn.CrossEntropyLoss()
              trainMyModel(epk, cnnModel, train_data_loader, heyGPU, optimizer, scheduler, val_data_loader, 1, lossfunc)
              del cnnModel
              del optimizer, scheduler, epk, ols, heyGPU, bsz, start, config

In [None]:
if WANDB: wandb.agent(sweep_id, SweepParent)
else: SweepParent()