<a href="https://colab.research.google.com/github/nakul-jindal/Resnet-on-CIFAR10/blob/master/resnet_on_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

network inputs are 32×32 images, with the per-pixel mean
subtracted. 

The subsampling is performed by convolutions with a stride of 2. 

The network ends with a global average pooling, a 10-way fully-connected layer, and softmax. 

There are total 6n+2 stacked weighted layers

output map size 32×32 16×16 8×8
 ###layers         1+2n     2n 2n
 ###filters        16       32 64

weight decay of 0.0001 and momentum of 0.9

no dropout

batch size of 128 on GPU

We start with a learning
rate of 0.1, divide it by 10 at intervals

4 pixels are padded on each side,
and a 32×32 crop is randomly sampled from the padded
image or its horizontal flip. For testing, we only evaluate
the single view of the original 32×32 image

compare n = {3, 5, 7, 9}, leading to 20, 32, 44, and
56-layer networks


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import StepLR

In [2]:
transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5) , transforms.RandomCrop(size=32,padding=4), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465),(0.247, 0.243, 0.261))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [0]:
class simple(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, down_sample=None) :
        super(simple, self).__init__()
        self.conv1=nn.Conv2d(in_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.conv2=nn.Conv2d(out_channels, out_channels, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.down_sample=down_sample
        self.relu=nn.ReLU(inplace=True)

    def forward(self,x):
      res=x
      out = self.relu(self.bn1(self.conv1(x)))
      out = self.bn2(self.conv2(out))
      if self.down_sample:
        res=self.down_sample(x)
      out= self.relu(out+res)
      return out    

In [0]:
class Bottleneck(nn.Module):
  def __init__(self, in_channels, out_channels, stride=1, down_sampling=None):
    super(Bottleneck, self).__init__()
    self.conv1=nn.Conv2d(in_channels, out_channels//4, kernel_size=1,stride=stride, padding=0, bias=False)
    self.bn1=nn.BatchNorm2d(out_channels//4)
    self.relu=nn.ReLU(inplace=True)  
    self.conv2=nn.Conv2d(out_channels//4, out_channels//4, kernel_size=3,stride=1, padding=1, bias=False)
    self.bn2=nn.BatchNorm2d(out_channels//4)
    self.conv3=nn.Conv2d(out_channels//4, out_channels, kernel_size=1,stride=1, padding=0, bias=False)
    self.bn3=nn.BatchNorm2d(out_channels)

    self.down_sampling=down_sampling
  def forward(self,x):
    res=x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)
    out=self.conv3(out)
    out=self.bn3(out)
    if self.down_sampling:
      res=self.down_sampling(x)
    out = out+res 
    out= self.relu(out)
    return out

In [0]:
class Net(nn.Module):
    def __init__(self, block, layers, num_classes=10):
       super(Net, self).__init__()
       self.in_channels=16
       self.conv=nn.Conv2d(3,16, kernel_size=3,stride=1, padding=1, bias=False)
       self.bn=nn.BatchNorm2d(16)
       self.relu = nn.ReLU(inplace=True)
       self.l1 = self.make_layer(block, 16, layers[0])
       self.l2 = self.make_layer(block, 32, layers[1], 2)
       self.l3 = self.make_layer(block, 64, layers[2], 2)
       self.avg_pool = nn.AvgPool2d(8)
       self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
       down_sample = None
       if (stride != 1) or (self.in_channels != out_channels):
            down_sample = nn.Sequential( nn.Conv2d(self.in_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False), nn.BatchNorm2d(out_channels))
       layers = []
       layers.append(block(self.in_channels, out_channels, stride, down_sample))
       self.in_channels=out_channels
       for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
       return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.relu(self.bn(self.conv(x)))
        out = self.l1(out)
        out = self.l2(out)
        out = self.l3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [0]:
n=7
model = Net(block=simple, layers=[n,n,n]).to(device)

In [0]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)              
scheduler = StepLR(optimizer, step_size=64, gamma=0.1)

In [9]:
for epoch in range(160):  

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.requires_grad_()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        running_loss += loss.item()
        if i % 300 == 299:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
        
        
    scheduler.step()

print('Finished Training')

[1,   300] loss: 1.912
[2,   300] loss: 1.434
[3,   300] loss: 1.121
[4,   300] loss: 0.870
[5,   300] loss: 0.713
[6,   300] loss: 0.625
[7,   300] loss: 0.567
[8,   300] loss: 0.520
[9,   300] loss: 0.476
[10,   300] loss: 0.448
[11,   300] loss: 0.422
[12,   300] loss: 0.401
[13,   300] loss: 0.381
[14,   300] loss: 0.365
[15,   300] loss: 0.350
[16,   300] loss: 0.337
[17,   300] loss: 0.319
[18,   300] loss: 0.320
[19,   300] loss: 0.304
[20,   300] loss: 0.299
[21,   300] loss: 0.290
[22,   300] loss: 0.279
[23,   300] loss: 0.273
[24,   300] loss: 0.267
[25,   300] loss: 0.260
[26,   300] loss: 0.255
[27,   300] loss: 0.250
[28,   300] loss: 0.248
[29,   300] loss: 0.240
[30,   300] loss: 0.236
[31,   300] loss: 0.234
[32,   300] loss: 0.229
[33,   300] loss: 0.226
[34,   300] loss: 0.223
[35,   300] loss: 0.219
[36,   300] loss: 0.221
[37,   300] loss: 0.212
[38,   300] loss: 0.211
[39,   300] loss: 0.204
[40,   300] loss: 0.200
[41,   300] loss: 0.204
[42,   300] loss: 0.200
[

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the simple residual network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the simple residual network on the 10000 test images: 92 %


In [11]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

print ('for simple residual network')

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

for simple residual network
Accuracy of plane : 89 %
Accuracy of   car : 96 %
Accuracy of  bird : 87 %
Accuracy of   cat : 76 %
Accuracy of  deer : 92 %
Accuracy of   dog : 93 %
Accuracy of  frog : 91 %
Accuracy of horse : 100 %
Accuracy of  ship : 93 %
Accuracy of truck : 89 %


In [0]:
n=5
model = Net(block=Bottleneck , layers=[n,n,n]).to(device)

In [0]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)           #     
scheduler = StepLR(optimizer, step_size=64, gamma=0.1)

In [0]:
for epoch in range(160):  

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.requires_grad_()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        running_loss += loss.item()
        if i % 300 == 299:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
        
        
    scheduler.step()

print('Finished Training')

In [0]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the bottleneck residual network on the 10000 test images: %d %%' % (
    100 * correct / total))

In [0]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

print ('for bottleneck residual network')

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))