In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, sampler
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

  'Matplotlib is building the font cache using fc-list. '


In [2]:
class IdentityBlock(nn.Module):
    
    def __init__(self, f, in_channels, out_channels):
        """
        Implementation of the class IdentityBlock as defined in Figure 3

        Arguments:
        f -- integer, specifying the shape of the middle CONV's window for the main path
        in_channels -- integer, specifying the number of incoming channels to this IdentityBlock
        out_channels -- a triple where ith component is number of filters for the ith CONV2D 
                        sub-block in the main path
        """
        super().__init__() 

        self.f = f
        self.in_channels = in_channels
        self.out_channels = out_channels

        #Replace "None" in the right hand sides (rhs) of following with your code. Each rhs is one line of code. 
        #Description and hints above may be helpful

        self.conv1 = nn.Conv2d(in_channels, out_channels[0], kernel_size = (1, 1), stride = (1, 1))
        self.bn1 =nn.BatchNorm2d(out_channels[0],affine=True)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels[0],out_channels[1],kernel_size= (3,3),stride=(1,1),padding=(1,1))
        self.bn2 = nn.BatchNorm2d(out_channels[1],affine=True)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(out_channels[1], out_channels[2], kernel_size = (1, 1), stride = (1, 1))
        self.bn3 = nn.BatchNorm2d(out_channels[2],affine=True) 
    
    def forward(self, x):
        """
        Forward propagation through IdentityBlock

        Arguments:
        x -- the input, a 4D Tensor of dimension (batch_size x in_channels x height x width)

        Returns:
        a 4D Tensor of dimension (batch_size x output_channels[2] x height x width)    
        """
        #Replace "None" in the right hand sides (rhs) of following with your code. 
        #Few of them already filled to help you.
        #Each rhs is one line of code. 
        #Description and hints above may be helpful
        
        x_shortcut = x #save x
        x = self.conv1(x)
        x = self.bn1(x) # apply first batch normalization
        x = self.relu1(x)
        x = self.conv2(x) # apply second convolution
        x = self.bn2(x)
        x = self.relu2(x) # apply second relu activation
        x = self.conv3(x) # apply 3rd convolution
        x = self.bn3(x) # apply 3rd batch normalization
        x = F.relu(x + x_shortcut)
        return x

In [3]:
class ConvBlock(nn.Module):
    
    def __init__(self, f, in_channels, out_channels, s = 2):
        """
        Implementation of the class ConvBlock as defined in Figure 4

        Arguments:
        f -- integer, specifying the shape of the middle CONV's window for the main path
        in_channels -- integer, specifying the number of incoming channels to this ConvBlock
        out_channels -- a triple where ith component is number of filters for the ith CONV2D 
                        sub-block in the main path
        s -- integer specifying the stride parameter required for first convolution and 
             convolution for shortcut path. Default is set to 2.
        """
        super().__init__()
        
        self.f = f
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.s = s
        
        # Replace "None" in the right hand sides (rhs) of following with your code. Each rhs is one line of code. 
        # Description and hints above may be helpful
        
        self.conv1 = nn.Conv2d(in_channels, out_channels[0], kernel_size = (1, 1), stride = (s, s))
        self.bn1 =nn.BatchNorm2d(out_channels[0],affine=True)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels[0],out_channels[1],kernel_size= (3,3),stride=(1,1),padding=(1,1))
        self.bn2 = nn.BatchNorm2d(out_channels[1],affine=True)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(out_channels[1], out_channels[2], kernel_size = (1, 1), stride = (1, 1))
        self.bn3 = nn.BatchNorm2d(out_channels[2],affine=True) 
        #self.relu3 = nn.ReLU()
        self.conv_shortcut = nn.Conv2d(in_channels, out_channels[2], kernel_size = (1, 1), stride = (s, s))
        self.bn_shortcut = nn.BatchNorm2d(out_channels[2],affine=True) 
        
    def forward(self, x):
        """
        Forward propagation through ConvBlock

        Arguments:
        x -- the input, a 4D Tensor of dimension (batch_size x in_channels x height x width)

        Returns:
        a 4D Tensor of dimension (batch_size x output_channels[2] x height x width)    
        """
        
        #Replace "None" in the right hand sides (rhs) of following with your code. 
        #Few of them already filled to help you.
        #Each rhs is one line of code. 
        #Description and hints above may be helpful
        
        x_shortcut = x #save x
        x = self.conv1(x)
        x = self.bn1(x) # apply first batch normalization
        x = self.relu1(x)
        x = self.conv2(x) # apply second convolution
        x = self.bn2(x)
        x = self.relu2(x) # apply second relu activation
        x = self.conv3(x) # apply 3rd convolution
        x = self.bn3(x) # apply 3rd batch normalization
        
        x_shortcut = self.conv_shortcut(x_shortcut) # apply convolution for shortcut path
        
        x_shortcut = self.bn_shortcut(x_shortcut) # apply batch normalization for shortcut path
        x = F.relu(x + x_shortcut) # apply relu activation to aggregate from main and shortcut paths. Use F.relu
        return x

In [4]:
class ResNet50(nn.Module):
    
    def __init__(self):
        
        super().__init__()
           
        self.conv_stg1 = nn.Conv2d(3, 64, kernel_size = (7, 7), stride = (2, 2), padding = (3, 3))
        self.bn_stg1 = nn.BatchNorm2d(64)
        self.max_pool_stg1 = nn.MaxPool2d(kernel_size = (3, 3), stride = (2, 2))
        
        self.convblk1_stg2 = ConvBlock(3, 64, (64, 64, 256), s = 1)
        self.idblk1_stg2 = IdentityBlock(3, 256, (64, 64, 256))
        self.idblk2_stg2 = IdentityBlock(3, 256, (64, 64, 256))
        
        self.convblk1_stg3 = ConvBlock(3, 256, (128, 128, 512), s = 2)
        self.idblk1_stg3 = IdentityBlock(3, 512, (128, 128, 512))
        self.idblk2_stg3 = IdentityBlock(3, 512, (128, 128, 512))
        self.idblk3_stg3 = IdentityBlock(3, 512, (128, 128, 512))
        
        self.convblk1_stg4 = ConvBlock(3, 512, (256, 256, 1024), s = 2)
        self.idblk1_stg4 = IdentityBlock(3, 1024, (256, 256, 1024))
        self.idblk2_stg4 = IdentityBlock(3, 1024, (256, 256, 1024))
        self.idblk3_stg4 = IdentityBlock(3, 1024, (256, 256, 1024))
        self.idblk4_stg4 = IdentityBlock(3, 1024, (256, 256, 1024))
        self.idblk5_stg4 = IdentityBlock(3, 1024, (256, 256, 1024))
        
        self.convblk1_stg5 = ConvBlock(3, 1024, (512, 512, 2048), s = 2)
        self.idblk1_stg5 = IdentityBlock(3, 2048, (512, 512, 2048))
        self.idblk2_stg5 = IdentityBlock(3, 2048, (512, 512, 2048))
        
        self.avg_pool = nn.AvgPool2d(kernel_size = (2, 2))
        self.fc =  nn.Linear(2048, 10)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform(m.weight.data)                
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()               
       
        
    def forward(self, x):
        x = self.conv_stg1(x)
        x = self.bn_stg1(x)
        x = self.max_pool_stg1(x)
        x = self.convblk1_stg2(x)
        x = self.idblk1_stg2(x)
        x = self.idblk2_stg2(x)
        x = self.convblk1_stg3(x)
        x = self.idblk1_stg3(x)
        x = self.idblk2_stg3(x)
        x = self.idblk3_stg3(x)
        x = self.convblk1_stg4(x)
        x = self.idblk1_stg4(x)
        x = self.idblk2_stg4(x)
        x = self.idblk3_stg4(x)
        x = self.idblk4_stg4(x)
        x = self.idblk5_stg4(x)
        x = self.convblk1_stg5(x)
        x = self.idblk1_stg5(x)
        x = self.idblk2_stg5(x)
        x = self.avg_pool(x)
        x = x.view(-1, 2048)        
        x = self.fc(x)
        return x

In [6]:
transform = transforms.Compose(
    [transforms.Resize(size=(64,64), interpolation=2),transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data',train=True,download=False,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=32,shuffle=True,num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data',train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)

In [7]:
import torch.optim as optim
from PIL import Image,ImageOps
torch.manual_seed(23)
learning_rate = 0.1
resnet50 = ResNet50()
criterion = torch.nn.CrossEntropyLoss().cuda()






In [38]:
#print(resnet50.parameters)

In [8]:
#code on GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [9]:
#for gpu
resnet50.to(device)

ResNet50(
  (conv_stg1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn_stg1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool_stg1): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (convblk1_stg2): ConvBlock(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_shortcut): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (bn_shortcut): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [13]:
EPOCHS = [50,25,25]
learning_rate = 0.1
for num_epochs in EPOCHS:
    print("Traning model for %d epochs with learning_rate %f" % (num_epochs,learning_rate))
    for epoch in range(num_epochs):
        running_loss = 0.0
        optimizer = torch.optim.Adam(resnet50.parameters(),lr = learning_rate)
        for i , data in enumerate(trainloader,0):
            #gets the inputs
            inputs,labels =data
            inputs, labels = inputs.to(device),labels.to(device)
        
            #zero the parameter gradients
            optimizer.zero_grad()
        
            #forward+backward+optimize
            output = resnet50(inputs)
            loss = criterion(output,labels)
            loss.backward()
            optimizer.step()
        
            #print the statistics
            running_loss += loss.item()
            if i%2000 == 0:              #print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    learning_rate *= 0.1
print("finished Training")


Traning model for 50 epochs with learning_rate 0.100000
[1,     1] loss: 0.001
[2,     1] loss: 0.001
[3,     1] loss: 0.001
[4,     1] loss: 0.001
[5,     1] loss: 0.001
[6,     1] loss: 0.001
[7,     1] loss: 0.001
[8,     1] loss: 0.001
[9,     1] loss: 0.001
[10,     1] loss: 0.001
[11,     1] loss: 0.001
[12,     1] loss: 0.001
[13,     1] loss: 0.001
[14,     1] loss: 0.001
[15,     1] loss: 0.000
[16,     1] loss: 0.000
[17,     1] loss: 0.001
[18,     1] loss: 0.000
[19,     1] loss: 0.001
[20,     1] loss: 0.001
[21,     1] loss: 0.000
[22,     1] loss: 0.000
[23,     1] loss: 0.001
[24,     1] loss: 0.001
[25,     1] loss: 0.001
[26,     1] loss: 0.000
[27,     1] loss: 0.000
[28,     1] loss: 0.000
[29,     1] loss: 0.000
[30,     1] loss: 0.000
[31,     1] loss: 0.000
[32,     1] loss: 0.000
[33,     1] loss: 0.000
[34,     1] loss: 0.000
[35,     1] loss: 0.000
[36,     1] loss: 0.000
[37,     1] loss: 0.000
[38,     1] loss: 0.000
[39,     1] loss: 0.000
[40,     1] loss:

In [34]:
MyModel = resnet50
torch.save(MyModel.state_dict(), './data/model.pth')

In [29]:
trial  = torch.load("./model.pth")
trial.keys()

odict_keys(['conv_stg1.weight', 'conv_stg1.bias', 'bn_stg1.weight', 'bn_stg1.bias', 'bn_stg1.running_mean', 'bn_stg1.running_var', 'bn_stg1.num_batches_tracked', 'convblk1_stg2.conv1.weight', 'convblk1_stg2.conv1.bias', 'convblk1_stg2.bn1.weight', 'convblk1_stg2.bn1.bias', 'convblk1_stg2.bn1.running_mean', 'convblk1_stg2.bn1.running_var', 'convblk1_stg2.bn1.num_batches_tracked', 'convblk1_stg2.conv2.weight', 'convblk1_stg2.conv2.bias', 'convblk1_stg2.bn2.weight', 'convblk1_stg2.bn2.bias', 'convblk1_stg2.bn2.running_mean', 'convblk1_stg2.bn2.running_var', 'convblk1_stg2.bn2.num_batches_tracked', 'convblk1_stg2.conv3.weight', 'convblk1_stg2.conv3.bias', 'convblk1_stg2.bn3.weight', 'convblk1_stg2.bn3.bias', 'convblk1_stg2.bn3.running_mean', 'convblk1_stg2.bn3.running_var', 'convblk1_stg2.bn3.num_batches_tracked', 'convblk1_stg2.conv_shortcut.weight', 'convblk1_stg2.conv_shortcut.bias', 'convblk1_stg2.bn_shortcut.weight', 'convblk1_stg2.bn_shortcut.bias', 'convblk1_stg2.bn_shortcut.running

total 10000, correct7413
Accuracy 74.0
finished Testing


In [32]:
#use the saved model
MyModel = ResNet50()
MyModel.load_state_dict(torch.load('./model.pth'))
MyModel.to(device)



ResNet50(
  (conv_stg1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn_stg1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool_stg1): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (convblk1_stg2): ConvBlock(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_shortcut): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (bn_shortcut): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [33]:
#testing
correct= 0
total= 0
for i , data in enumerate(testloader,0):
        #gets the inputs
        inputs,labels = data
        inputs, labels = inputs.to(device),labels.to(device)
        output = MyModel(inputs.cuda())
        _,prediction= torch.max(output.data,1)
        correct += (prediction == labels.cuda()).sum()
        total+= labels.size(0)

print("total {}, correct{}" .format(total,correct ))
print("Accuracy " + str(float(correct * 100. /total)))
print("finished Testing")


total 10000, correct7413
Accuracy 74.0
finished Testing
