In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

device='cuda'

torch.backends.cudnn.benchmark=True

In [2]:
cifar10_train_transform = torchvision.transforms.Compose(
    [
     torchvision.transforms.RandomHorizontalFlip(),
     torchvision.transforms.RandomCrop(32,4),
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))        
     #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))        
    ])

cifar10_test_transform = torchvision.transforms.Compose(
    [
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))    
     #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                
    ])

cifar_train_ds = torchvision.datasets.CIFAR10('./data/', train=True, transform=cifar10_train_transform, target_transform=None, download=True)
cifar_test_ds = torchvision.datasets.CIFAR10('./data/', train=False, transform=cifar10_test_transform, target_transform=None, download=True)


trainloader = torch.utils.data.DataLoader(cifar_train_ds, batch_size=128,
                                          shuffle=True, num_workers=16)


testloader = torch.utils.data.DataLoader(cifar_test_ds, batch_size=32,
                                          shuffle=True, num_workers=2)

print("Train size {} Test size {}".format(len(trainloader),len(testloader)))

Files already downloaded and verified
Files already downloaded and verified
Train size 391 Test size 313


In [3]:
# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)
print("Image shape {} Label shape {}".format(images.shape,labels.shape))
print("Labels {}".format(labels))

testiter = iter(testloader)
images, labels = next(dataiter)
print("Image shape {} Label shape {}".format(images.shape,labels.shape))
print("Labels {}".format(labels))


Image shape torch.Size([128, 3, 32, 32]) Label shape torch.Size([128])
Labels tensor([6, 4, 8, 7, 2, 8, 5, 6, 2, 6, 6, 6, 7, 9, 6, 9, 9, 2, 1, 6, 5, 0, 9, 2,
        4, 6, 7, 9, 2, 7, 4, 8, 0, 3, 6, 1, 8, 8, 9, 3, 4, 6, 7, 8, 9, 7, 8, 1,
        5, 7, 8, 4, 3, 5, 4, 5, 0, 8, 9, 0, 8, 6, 4, 7, 0, 1, 1, 0, 9, 4, 3, 6,
        1, 0, 6, 7, 0, 6, 2, 5, 7, 2, 5, 6, 0, 5, 0, 9, 6, 3, 6, 9, 6, 6, 6, 5,
        9, 2, 9, 3, 9, 1, 0, 1, 8, 3, 5, 0, 0, 9, 0, 4, 6, 2, 9, 7, 5, 7, 6, 8,
        9, 1, 4, 5, 3, 7, 5, 9])
Image shape torch.Size([128, 3, 32, 32]) Label shape torch.Size([128])
Labels tensor([7, 4, 7, 4, 4, 6, 3, 0, 9, 1, 6, 9, 3, 8, 0, 8, 6, 8, 2, 6, 3, 7, 1, 0,
        9, 4, 9, 6, 4, 7, 5, 5, 0, 3, 9, 5, 1, 1, 6, 3, 1, 5, 5, 8, 8, 5, 7, 6,
        2, 9, 4, 0, 2, 2, 4, 2, 1, 1, 3, 0, 8, 0, 4, 9, 8, 1, 3, 6, 5, 5, 8, 6,
        7, 9, 3, 5, 4, 0, 9, 8, 5, 7, 5, 5, 2, 4, 2, 5, 3, 8, 2, 4, 5, 8, 5, 8,
        9, 5, 1, 4, 0, 8, 1, 0, 6, 3, 8, 9, 4, 2, 8, 4, 6, 0, 2, 2, 9, 7, 5, 9,
        4, 

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self,inchannels, outchannels,biasFlag=False):
        super(ResidualBlock,self).__init__()
      
        strideForFirstLayer= int(outchannels/inchannels) #this will be int
        
        if strideForFirstLayer == 0 :
            #Condition where outchannels might be less than in channels
            strideForFirstLayer= int(inchannels/outchannels) #this will be int
            
        #first layer will have stride of 2 (for downsampling ) or 1
        self.conv1 = nn.Conv2d(inchannels,outchannels, 3,strideForFirstLayer,padding=1,bias=biasFlag)
        self.identityConn = nn.Sequential() # will be empty if inchannels== outchannels
        if strideForFirstLayer > 1:
            self.identityConn.add_module("shortcut_conv", nn.Conv2d(inchannels,outchannels, 1,stride=strideForFirstLayer,padding=0,bias=biasFlag))
            self.identityConn.add_module("shortcut_bn",nn.BatchNorm2d(outchannels))
            
        self.bn1 = nn.BatchNorm2d(outchannels)
        self.conv2 = nn.Conv2d(outchannels,outchannels, 3,stride=1,padding=1,bias=biasFlag)
        self.bn2 = nn.BatchNorm2d(outchannels)
        
    
    def forward(self,x):
        origX = x
        #main residual block
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        #identity connection
        identityX = self.identityConn(origX)
        #add residual and identity
        xsum = x + identityX 
        return F.relu(xsum)
            
class ResNetBlock(nn.Module):        
    def __init__(self,inputCh, outputCh,numModules,biasFlag=False):
        super(ResNetBlock,self).__init__()
        self.seqModule = nn.Sequential()
        #first layer is a downsample layer
        self.seqModule.add_module("downsamp_layer",ResidualBlock(inputCh,outputCh,biasFlag))
        for i in range(1,numModules):
            self.seqModule.add_module("residual_{}".format(i),ResidualBlock(outputCh,outputCh,biasFlag))
            
    def forward(self,x):
        return self.seqModule(x)
    
class MyCifarResnetModel(nn.Module):        
    def __init__(self,biasFlag=False):
        super(MyCifarResnetModel,self).__init__()
        self.allLayers = nn.Sequential()
        self.allLayers.add_module("layer0",nn.Conv2d(3,16,3,stride=1,padding=1,bias=biasFlag))
        self.allLayers.add_module("layer0_bn",nn.BatchNorm2d(16))
        self.allLayers.add_module("layer0_relu",nn.ReLU(True))
        self.allLayers.add_module("block1",ResNetBlock(16,16,3,biasFlag))
        self.allLayers.add_module("block2",ResNetBlock(16,32,3,biasFlag))
        self.allLayers.add_module("block3",ResNetBlock(32,64,3,biasFlag))
        self.allLayers.add_module("avg_pool",nn.AvgPool2d(8))
        self.allLayers.add_module("layer7",nn.Flatten())
        self.allLayers.add_module("layer8",nn.Linear(64,10))
        
        #init weights
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m, nn.Linear):
                nn.init.normal(m.weight, std=1e-3)
                nn.init.constant(m.bias, 0)                
    
    def forward(self,x):
        return self.allLayers(x)


    
    
my_model = MyCifarResnetModel(biasFlag=True).cuda()
summary(my_model, input_size=(3,32,32))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]           2,320
       BatchNorm2d-5           [-1, 16, 32, 32]              32
            Conv2d-6           [-1, 16, 32, 32]           2,320
       BatchNorm2d-7           [-1, 16, 32, 32]              32
     ResidualBlock-8           [-1, 16, 32, 32]               0
            Conv2d-9           [-1, 16, 32, 32]           2,320
      BatchNorm2d-10           [-1, 16, 32, 32]              32
           Conv2d-11           [-1, 16, 32, 32]           2,320
      BatchNorm2d-12           [-1, 16, 32, 32]              32
    ResidualBlock-13           [-1, 16, 32, 32]               0
           Conv2d-14           [-1, 16,

In [5]:
import  time 

loss_fn = nn.CrossEntropyLoss().cuda()

model = my_model
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9,weight_decay=0.0001)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[100,150])

#Basic training and evaluation steps are identical
#the only difference is that during evaluation mode,
#  the model should be set to eval mode and steps should be executed with no_grad
def trainAndEval(dsloader,trainingMode=True):
    epoch_start_time = time.time()
    e_num_correct = 0
    e_total_samples = 0
    optimizer.zero_grad()  # clear previous gradients
    if trainingMode:
        modeStr = "Train"
    else:
        modeStr = "Eval"
    for i, dataBatch in enumerate(dsloader):
        load_ts = time.time()
        data, target = dataBatch
        load_te = time.time()
        X = data.to('cuda')
        Y = target.to('cuda')

        #aprint("Shape of X {}. Target shape : {}",X.shape,Y.shape)
        
        output_batch = model(X)           # compute model output
        #print("Output from model {}",output_batch)
        loss = loss_fn(output_batch, Y)  # calculate loss

        if trainingMode :
            loss.backward()        # compute gradients of all variables wrt loss
            optimizer.step()       # perform updates using calculated gradients
            optimizer.zero_grad()  # clear  gradients for the next iter

        pred_idx = torch.argmax(output_batch,dim=1)
        correct = (pred_idx == Y).float().sum()
        e_num_correct += correct
        e_total_samples += X.shape[0]
    epoch_end_time = time.time()
    print("Epoch {} {} Accuracy  {} training time: {}".format(epoch,modeStr,e_num_correct.float()/e_total_samples,epoch_end_time-epoch_start_time))
    
    
for epoch in range(200):
    print("Epoch {} Learning Rate {}".format(epoch,lr_scheduler.get_lr()))
    model.train()
    trainAndEval(trainloader,trainingMode=True)
    with torch.no_grad():
        model.eval()
        trainAndEval(testloader,trainingMode=False)
    lr_scheduler.step()
    

Epoch 0 Learning Rate [0.1]
Epoch 0 Train Accuracy  0.3847000002861023 training time: 7.7804906368255615
Epoch 0 Eval Accuracy  0.48179998993873596 training time: 1.2877135276794434
Epoch 1 Learning Rate [0.1]
Epoch 1 Train Accuracy  0.5950799584388733 training time: 7.580754518508911
Epoch 1 Eval Accuracy  0.586899995803833 training time: 1.2424144744873047
Epoch 2 Learning Rate [0.1]
Epoch 2 Train Accuracy  0.678879976272583 training time: 7.596749782562256
Epoch 2 Eval Accuracy  0.5655999779701233 training time: 1.302048683166504
Epoch 3 Learning Rate [0.1]
Epoch 3 Train Accuracy  0.7299399971961975 training time: 7.6442694664001465
Epoch 3 Eval Accuracy  0.6423999667167664 training time: 1.2325327396392822
Epoch 4 Learning Rate [0.1]
Epoch 4 Train Accuracy  0.762779951095581 training time: 7.726342678070068
Epoch 4 Eval Accuracy  0.7580999732017517 training time: 1.189763069152832
Epoch 5 Learning Rate [0.1]
Epoch 5 Train Accuracy  0.7846399545669556 training time: 7.60582470893859

Epoch 45 Train Accuracy  0.9073399901390076 training time: 7.77794075012207
Epoch 45 Eval Accuracy  0.8434000015258789 training time: 1.2235441207885742
Epoch 46 Learning Rate [0.1]
Epoch 46 Train Accuracy  0.906279981136322 training time: 7.789281845092773
Epoch 46 Eval Accuracy  0.8662999868392944 training time: 1.241004228591919
Epoch 47 Learning Rate [0.1]
Epoch 47 Train Accuracy  0.9089399576187134 training time: 7.844830274581909
Epoch 47 Eval Accuracy  0.8603999614715576 training time: 1.2416126728057861
Epoch 48 Learning Rate [0.1]
Epoch 48 Train Accuracy  0.9099799990653992 training time: 7.7651190757751465
Epoch 48 Eval Accuracy  0.8624999523162842 training time: 1.2353122234344482
Epoch 49 Learning Rate [0.1]
Epoch 49 Train Accuracy  0.9107999801635742 training time: 7.79703426361084
Epoch 49 Eval Accuracy  0.8592000007629395 training time: 1.2717218399047852
Epoch 50 Learning Rate [0.1]
Epoch 50 Train Accuracy  0.9091799855232239 training time: 7.880209445953369
Epoch 50 Ev

Epoch 90 Train Accuracy  0.9229399561882019 training time: 7.841996669769287
Epoch 90 Eval Accuracy  0.8747999668121338 training time: 1.2659013271331787
Epoch 91 Learning Rate [0.1]
Epoch 91 Train Accuracy  0.9244799613952637 training time: 7.904457092285156
Epoch 91 Eval Accuracy  0.8534999489784241 training time: 1.2703776359558105
Epoch 92 Learning Rate [0.1]
Epoch 92 Train Accuracy  0.9234799742698669 training time: 7.9103004932403564
Epoch 92 Eval Accuracy  0.8434000015258789 training time: 1.2852962017059326
Epoch 93 Learning Rate [0.1]
Epoch 93 Train Accuracy  0.9234599471092224 training time: 7.843433141708374
Epoch 93 Eval Accuracy  0.8761999607086182 training time: 1.256296157836914
Epoch 94 Learning Rate [0.1]
Epoch 94 Train Accuracy  0.9251799583435059 training time: 7.76575231552124
Epoch 94 Eval Accuracy  0.871399998664856 training time: 1.2249460220336914
Epoch 95 Learning Rate [0.1]
Epoch 95 Train Accuracy  0.9224199652671814 training time: 7.847575664520264
Epoch 95 E

Epoch 132 Train Accuracy  0.9889599680900574 training time: 7.903865814208984
Epoch 132 Eval Accuracy  0.9168999791145325 training time: 1.2413058280944824
Epoch 133 Learning Rate [0.010000000000000002]
Epoch 133 Train Accuracy  0.9890799522399902 training time: 7.768604516983032
Epoch 133 Eval Accuracy  0.9174000024795532 training time: 1.211235761642456
Epoch 134 Learning Rate [0.010000000000000002]
Epoch 134 Train Accuracy  0.9890199899673462 training time: 7.781193971633911
Epoch 134 Eval Accuracy  0.9174000024795532 training time: 1.2779829502105713
Epoch 135 Learning Rate [0.010000000000000002]
Epoch 135 Train Accuracy  0.9892799854278564 training time: 7.845682859420776
Epoch 135 Eval Accuracy  0.9177999496459961 training time: 1.2134332656860352
Epoch 136 Learning Rate [0.010000000000000002]
Epoch 136 Train Accuracy  0.9892999529838562 training time: 7.890537738800049
Epoch 136 Eval Accuracy  0.9170999526977539 training time: 1.2702713012695312
Epoch 137 Learning Rate [0.010000

Epoch 172 Eval Accuracy  0.9199999570846558 training time: 1.2636878490447998
Epoch 173 Learning Rate [0.0010000000000000002]
Epoch 173 Train Accuracy  0.9950999617576599 training time: 7.834820747375488
Epoch 173 Eval Accuracy  0.9194999933242798 training time: 1.203376054763794
Epoch 174 Learning Rate [0.0010000000000000002]
Epoch 174 Train Accuracy  0.9957000017166138 training time: 7.802417755126953
Epoch 174 Eval Accuracy  0.9187999963760376 training time: 1.2212965488433838
Epoch 175 Learning Rate [0.0010000000000000002]
Epoch 175 Train Accuracy  0.9956799745559692 training time: 7.88286828994751
Epoch 175 Eval Accuracy  0.9187999963760376 training time: 1.2459728717803955
Epoch 176 Learning Rate [0.0010000000000000002]
Epoch 176 Train Accuracy  0.9957000017166138 training time: 7.91748571395874
Epoch 176 Eval Accuracy  0.9199000000953674 training time: 1.2548887729644775
Epoch 177 Learning Rate [0.0010000000000000002]
Epoch 177 Train Accuracy  0.9956799745559692 training time: 8