In [1]:
%pip install torch
%pip install torchvision
%pip install torchsummary

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

In [3]:
#choose device
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu')

In [4]:
epochs=50
#batch_size=64
batch_sizes=[64,128,256]
learning_rate=0.01

In [5]:
#define transform
train_transform=transforms.Compose(
    [transforms.Pad(4),
     transforms.RandomCrop(32),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485,0.456,0.406],
                          std=[0.229,0.224,0.225])])
valid_transform=transforms.Compose(
    [
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485,0.456,0.406],
                          std=[0.229,0.224,0.225])])

In [6]:
# download cifar-10 dataset
train_dataset=torchvision.datasets.CIFAR10(root='../data/',
                                           train=True,
                                           transform=train_transform,
                                           download=True)
test_dataset=torchvision.datasets.CIFAR10(root='../data/',
                                           train=False,
                                           transform=valid_transform,
                                           download=True)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
# define data loader
#train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
#test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

In [7]:
# residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1=nn.Conv2d(in_channels=in_channels,out_channels=out_channels,stride=stride,kernel_size=3,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.relu=nn.ReLU(inplace=True)
        self.conv2=nn.Conv2d(in_channels=out_channels,out_channels=out_channels,stride=1,kernel_size=3,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.downsample=nn.Sequential()
        if (stride!=1) or(in_channels!=out_channels):
            self.downsample=nn.Sequential(
                nn.Conv2d(in_channels=in_channels,out_channels=out_channels,stride=stride,kernel_size=1,bias=False),
                nn.BatchNorm2d(out_channels))
    def forward(self,x):
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        out=self.conv2(out)
        out=self.bn2(out)
        out+=self.downsample(x)
        out=self.relu(out)
        return out
        

In [8]:
# 4-layer ResNet
class ResNet(nn.Module):
    def __init__(self,block,layers,num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels=80                                                  # 64 can be modified
        self.conv=nn.Conv2d(in_channels=3,out_channels=80,stride=1,kernel_size=3,padding=1,bias=False)
        self.bn=nn.BatchNorm2d(80)
        self.relu=nn.ReLU(inplace=True)
        self.layer1=self.make_layer(block, 80,layers[0], stride=1)        # 64 can be modified
        self.layer2=self.make_layer(block, 160, layers[1], stride=2)      # 128 can be modified
        self.layer3=self.make_layer(block, 320, layers[2], stride=2)      # 256 can be modified
        #self.layer4=self.make_layer(block, 512, layers[3], stride=2)      # 512 can be modified
        self.avg_pool=nn.AvgPool2d(8)                                        # 4 can be modified
        self.fc=nn.Linear(320,10) 
        
    def make_layer(self,block,out_channels,blocks, stride):
        layers=[]
        layers.append(block(self.in_channels,out_channels,stride))
        self.in_channels=out_channels
        for i in range(1,blocks):
            layers.append(block(out_channels,out_channels,stride=1))
        return nn.Sequential(*layers)
    
    def forward(self,x):
        out=self.conv(x)
        out=self.bn(out)
        out=self.relu(out)
        out=self.layer1(out)
        out=self.layer2(out)
        out=self.layer3(out)
        #out=self.layer4(out)
        out=self.avg_pool(out)
        out=out.view(out.size(0),-1)
        out=self.fc(out)
        return out

In [10]:
model=ResNet(ResidualBlock,[3,3,2]).to(device)
summary(model,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 80, 32, 32]           2,160
       BatchNorm2d-2           [-1, 80, 32, 32]             160
              ReLU-3           [-1, 80, 32, 32]               0
            Conv2d-4           [-1, 80, 32, 32]          57,600
       BatchNorm2d-5           [-1, 80, 32, 32]             160
              ReLU-6           [-1, 80, 32, 32]               0
            Conv2d-7           [-1, 80, 32, 32]          57,600
       BatchNorm2d-8           [-1, 80, 32, 32]             160
              ReLU-9           [-1, 80, 32, 32]               0
    ResidualBlock-10           [-1, 80, 32, 32]               0
           Conv2d-11           [-1, 80, 32, 32]          57,600
      BatchNorm2d-12           [-1, 80, 32, 32]             160
             ReLU-13           [-1, 80, 32, 32]               0
           Conv2d-14           [-1, 80,

In [11]:
# loss function and optimizer
#criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9, weight_decay=1e-3)

In [9]:
#train
def train(model):
    model.train()
    train_loss=0
    train_acc=0
    for img,label in train_loader:
        img=img.to(device)
        label=label.to(device)
        #forward
        output=model(img)
        loss=criterion(output,label)
        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #calculate loss and acc
        train_loss+=loss.item()
        _, predicted=output.max(1)
        train_acc+=(predicted==label).sum().item()/len(predicted)
    return train_loss/len(train_loader),train_acc/len(train_loader)

In [10]:
def test(model):
    model.eval()
    with torch.no_grad():
        valid_acc=0
        for img,label in test_loader:
            img=img.to(device)
            label=label.to(device)
            output=model(img)
            _, predicted=output.max(1)
            valid_acc+=(predicted==label).sum().item()/len(output)
    return valid_acc/len(test_loader)

In [None]:
for batch_size in batch_sizes:
    print("Batch size: %d"%(batch_size))
    train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
    test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=False)
    model=ResNet(ResidualBlock,[3,3,2]).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01*(batch_size/32),momentum=0.9, weight_decay=1e-3)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=50)
    for i in range(epochs):
        train_loss, train_acc=train(model)
        test_acc=test(model)
        print("Epoch %d. Train Loss %.4f. Train Acc %.4f. Test Acc %.4f."%(i,train_loss,train_acc,test_acc))
        scheduler.step()

Batch size: 64
Epoch 0. Train Loss 1.4893. Train Acc 0.4510. Test Acc 0.5266.
Epoch 1. Train Loss 0.9747. Train Acc 0.6540. Test Acc 0.6248.
Epoch 2. Train Loss 0.7475. Train Acc 0.7405. Test Acc 0.7687.
Epoch 3. Train Loss 0.6366. Train Acc 0.7796. Test Acc 0.7472.
Epoch 4. Train Loss 0.5560. Train Acc 0.8098. Test Acc 0.7942.
Epoch 5. Train Loss 0.5079. Train Acc 0.8258. Test Acc 0.8187.
Epoch 6. Train Loss 0.4647. Train Acc 0.8416. Test Acc 0.7785.
Epoch 7. Train Loss 0.4345. Train Acc 0.8500. Test Acc 0.8286.
Epoch 8. Train Loss 0.4109. Train Acc 0.8597. Test Acc 0.8166.
Epoch 9. Train Loss 0.3892. Train Acc 0.8684. Test Acc 0.8259.
Epoch 10. Train Loss 0.3726. Train Acc 0.8721. Test Acc 0.8246.
Epoch 11. Train Loss 0.3561. Train Acc 0.8790. Test Acc 0.8371.
Epoch 12. Train Loss 0.3439. Train Acc 0.8831. Test Acc 0.8512.
Epoch 13. Train Loss 0.3251. Train Acc 0.8897. Test Acc 0.8491.
Epoch 14. Train Loss 0.3171. Train Acc 0.8913. Test Acc 0.8451.
Epoch 15. Train Loss 0.3032. Train 

In [11]:
batch_sizes=[128,256]
for batch_size in batch_sizes:
    print("Batch size: %d"%(batch_size))
    train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
    test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=False)
    model=ResNet(ResidualBlock,[3,3,2]).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01*(batch_size/32),momentum=0.9, weight_decay=1e-3)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=50)
    for i in range(epochs):
        train_loss, train_acc=train(model)
        test_acc=test(model)
        print("Epoch %d. Train Loss %.4f. Train Acc %.4f. Test Acc %.4f."%(i,train_loss,train_acc,test_acc))
        scheduler.step()

Batch size: 128
Epoch 0. Train Loss 1.5500. Train Acc 0.4239. Test Acc 0.5412.
Epoch 1. Train Loss 1.0380. Train Acc 0.6266. Test Acc 0.6440.
Epoch 2. Train Loss 0.8067. Train Acc 0.7174. Test Acc 0.6652.
Epoch 3. Train Loss 0.6657. Train Acc 0.7677. Test Acc 0.7159.
Epoch 4. Train Loss 0.5781. Train Acc 0.8008. Test Acc 0.7537.
Epoch 5. Train Loss 0.5194. Train Acc 0.8194. Test Acc 0.8079.
Epoch 6. Train Loss 0.4855. Train Acc 0.8328. Test Acc 0.7793.
Epoch 7. Train Loss 0.4423. Train Acc 0.8481. Test Acc 0.8067.
Epoch 8. Train Loss 0.4104. Train Acc 0.8601. Test Acc 0.7995.
Epoch 9. Train Loss 0.3903. Train Acc 0.8671. Test Acc 0.8343.
Epoch 10. Train Loss 0.3714. Train Acc 0.8735. Test Acc 0.8535.
Epoch 11. Train Loss 0.3595. Train Acc 0.8778. Test Acc 0.8358.
Epoch 12. Train Loss 0.3418. Train Acc 0.8851. Test Acc 0.8424.
Epoch 13. Train Loss 0.3324. Train Acc 0.8876. Test Acc 0.8488.
Epoch 14. Train Loss 0.3185. Train Acc 0.8915. Test Acc 0.8685.
Epoch 15. Train Loss 0.3034. Train