In [1]:
%pip install torch
%pip install torchvision
%pip install torchsummary

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

In [3]:
#choose device
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu')

1. Epoch, batchsize, learning rate都能调整

In [4]:
epochs=50
batch_size=64
learning_rate=0.01

2. transform的方法也能调整

In [5]:
#define transform
train_transform=transforms.Compose(
    [transforms.Pad(4),
     transforms.RandomCrop(32),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485,0.456,0.406],
                          std=[0.229,0.224,0.225])])
valid_transform=transforms.Compose(
    [
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485,0.456,0.406],
                          std=[0.229,0.224,0.225])])

In [6]:
# download cifar-10 dataset
train_dataset=torchvision.datasets.CIFAR10(root='../data/',
                                           train=True,
                                           transform=train_transform,
                                           download=True)
test_dataset=torchvision.datasets.CIFAR10(root='../data/',
                                           train=False,
                                           transform=valid_transform,
                                           download=True)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
# define data loader
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

In [8]:
# residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1=nn.Conv2d(in_channels=in_channels,out_channels=out_channels,stride=stride,kernel_size=3,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(out_channels)
        self.relu=nn.ReLU(inplace=True)
        self.conv2=nn.Conv2d(in_channels=out_channels,out_channels=out_channels,stride=1,kernel_size=3,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(out_channels)
        self.downsample=nn.Sequential()
        if (stride!=1) or(in_channels!=out_channels):
            self.downsample=nn.Sequential(
                nn.Conv2d(in_channels=in_channels,out_channels=out_channels,stride=stride,kernel_size=1,bias=False),
                nn.BatchNorm2d(out_channels))
    def forward(self,x):
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        out=self.conv2(out)
        out=self.bn2(out)
        out+=self.downsample(x)
        out=self.relu(out)
        return out
        

In [9]:
# 4-layer ResNet
class ResNet(nn.Module):
    def __init__(self,block,layers,num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels=64                                                  # 64 can be modified
        self.conv=nn.Conv2d(in_channels=3,out_channels=64,stride=1,kernel_size=3,padding=1,bias=False)
        self.bn=nn.BatchNorm2d(64)
        self.relu=nn.ReLU(inplace=True)
        self.layer1=self.make_layer(block, 64,layers[0], stride=1)        # 64 can be modified
        self.layer2=self.make_layer(block, 128, layers[1], stride=2)      # 128 can be modified
        self.layer3=self.make_layer(block, 256, layers[2], stride=2)      # 256 can be modified
        #self.layer4=self.make_layer(block, 512, layers[3], stride=2)      # 512 can be modified
        self.avg_pool=nn.AvgPool2d(8)                                        # 4 can be modified
        self.fc=nn.Linear(256,10) 
        
    def make_layer(self,block,out_channels,blocks, stride):
        layers=[]
        layers.append(block(self.in_channels,out_channels,stride))
        self.in_channels=out_channels
        for i in range(1,blocks):
            layers.append(block(out_channels,out_channels,stride=1))
        return nn.Sequential(*layers)
    
    def forward(self,x):
        out=self.conv(x)
        out=self.bn(out)
        out=self.relu(out)
        out=self.layer1(out)
        out=self.layer2(out)
        out=self.layer3(out)
        #out=self.layer4(out)
        out=self.avg_pool(out)
        out=out.view(out.size(0),-1)
        out=self.fc(out)
        return out

In [10]:
model=ResNet(ResidualBlock,[2,2,2]).to(device)
summary(model,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,864
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
            Conv2d-7           [-1, 64, 32, 32]          36,864
       BatchNorm2d-8           [-1, 64, 32, 32]             128
              ReLU-9           [-1, 64, 32, 32]               0
    ResidualBlock-10           [-1, 64, 32, 32]               0
           Conv2d-11           [-1, 64, 32, 32]          36,864
      BatchNorm2d-12           [-1, 64, 32, 32]             128
             ReLU-13           [-1, 64, 32, 32]               0
           Conv2d-14           [-1, 64,

3. Optimizer可以调SGD、ADAM等等

In [11]:
# loss function and optimizer
#criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9, weight_decay=1e-3)

In [12]:
#train
def train(model):
    model.train()
    train_loss=0
    train_acc=0
    for img,label in train_loader:
        img=img.to(device)
        label=label.to(device)
        #forward
        output=model(img)
        loss=criterion(output,label)
        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #calculate loss and acc
        train_loss+=loss.item()
        _, predicted=output.max(1)
        train_acc+=(predicted==label).sum().item()/len(predicted)
    return train_loss/len(train_loader),train_acc/len(train_loader)

In [13]:
def test(model):
    model.eval()
    with torch.no_grad():
        valid_acc=0
        for img,label in test_loader:
            img=img.to(device)
            label=label.to(device)
            output=model(img)
            _, predicted=output.max(1)
            valid_acc+=(predicted==label).sum().item()/len(output)
    return valid_acc/len(test_loader)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=50)
for i in range(epochs):
    train_loss, train_acc=train(model)
    test_acc=test(model)
    print("Epoch %d. Train Loss %.4f. Train Acc %.4f. Test Acc %.4f."%(i,train_loss,train_acc,test_acc))
    scheduler.step()

Epoch 0. Train Loss 1.3720. Train Acc 0.4989. Test Acc 0.6113.
Epoch 1. Train Loss 0.9173. Train Acc 0.6749. Test Acc 0.7197.
Epoch 2. Train Loss 0.7269. Train Acc 0.7469. Test Acc 0.6883.
Epoch 3. Train Loss 0.6137. Train Acc 0.7865. Test Acc 0.7869.
Epoch 4. Train Loss 0.5372. Train Acc 0.8135. Test Acc 0.7882.
Epoch 5. Train Loss 0.4840. Train Acc 0.8354. Test Acc 0.8182.
Epoch 6. Train Loss 0.4395. Train Acc 0.8490. Test Acc 0.7681.
Epoch 7. Train Loss 0.4145. Train Acc 0.8572. Test Acc 0.8018.
Epoch 8. Train Loss 0.3847. Train Acc 0.8681. Test Acc 0.8211.
Epoch 9. Train Loss 0.3611. Train Acc 0.8754. Test Acc 0.8264.
Epoch 10. Train Loss 0.3351. Train Acc 0.8850. Test Acc 0.8575.
Epoch 11. Train Loss 0.3174. Train Acc 0.8907. Test Acc 0.8457.
Epoch 12. Train Loss 0.3027. Train Acc 0.8980. Test Acc 0.8667.
Epoch 13. Train Loss 0.2842. Train Acc 0.9027. Test Acc 0.8669.
Epoch 14. Train Loss 0.2714. Train Acc 0.9069. Test Acc 0.8646.
Epoch 15. Train Loss 0.2543. Train Acc 0.9131. Tes