In [1]:
import torch
from torch import nn
from torch.nn import functional as F

import utils

In [2]:
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=False,strides=1):
        super().__init__()
        self.conv1=nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
        if use_1x1conv:
            self.conv3=nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2=nn.BatchNorm2d(num_channels)
        self.relu=nn.ReLU(inplace=True)
    def forward(self,X):
        Y=F.relu(self.bn1(self.conv1(X)))
        Y=self.bn2(self.conv2(Y))
        if self.conv3:
            X=self.conv3(X)
        Y+=X
        return F.relu(Y)

In [3]:
def block(input_channels, num_channels, num_residuals,first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels,num_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels,num_channels))
    return nn.Sequential(*blk)

In [4]:
b1= nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1))



In [5]:
b2=block(64,64,2,first_block=True)
b3=block(64,128,2)
b4=block(128,256,2)
b5=block(256,512,2)
net=nn.Sequential(b1,b2,b3,b4,b5,nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(),nn.Linear(512,10))

In [6]:
utils.shape(net)

Sequential output shape:  torch.Size([1, 64, 56, 56])
Sequential output shape:  torch.Size([1, 64, 56, 56])
Sequential output shape:  torch.Size([1, 128, 28, 28])
Sequential output shape:  torch.Size([1, 256, 14, 14])
Sequential output shape:  torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:  torch.Size([1, 512, 1, 1])
Flatten output shape:  torch.Size([1, 512])
Linear output shape:  torch.Size([1, 10])


In [7]:

lr,epochs,batch_size=0.05,10,128
train_iter,test_iter=utils.load_data_cifar10(batch_size,resize=64,transform=True)
utils.train_ch6(net,train_iter,test_iter,epochs,lr,utils.try_gpu(),0)

Files already downloaded and verified
Files already downloaded and verified
training on cuda:0
loss 1.966, train acc 0.315, test acc 0.405
loss 1.413, train acc 0.488, test acc 0.446
loss 1.225, train acc 0.563, test acc 0.582
loss 1.090, train acc 0.612, test acc 0.542
loss 0.984, train acc 0.653, test acc 0.629
loss 0.910, train acc 0.678, test acc 0.604
loss 0.832, train acc 0.707, test acc 0.655
loss 0.772, train acc 0.729, test acc 0.641
loss 0.707, train acc 0.752, test acc 0.659
loss 0.658, train acc 0.770, test acc 0.712
2721.0 examples/sec on cuda:0
