Reference : https://github.com/ndb796/Deep-Learning-Paper-Review-and-Practice/blob/master/code_practices/ResNet18_MNIST_Train.ipynb

## ResNet18 모델 정의 및 인스턴스 초기화

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
import os

In [42]:
# ResNet18을 위해 간단한 ver의 BasicBlock 클래스 정의
class BasicBlock(nn.Module):
    def __init__(self,in_planes,planes,stride=1):
        super(BasicBlock,self).__init__()

        # 3x3 필터 사용 (너비&높이 줄일 때는 stride 값 조절)
        self.conv1=nn.Conv2d(in_planes,planes,kernel_size=3,stride=stride,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(planes)

        # 3x3 필터 사용 (패딩=1이므로 너비&높이 동일함)
        self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(planes)

        # identity shortcuts
        self.shortcut=nn.Sequential()

        # projection shortcuts
        if stride!=1:
            self.shortcut=nn.Sequential(
                nn.Conv2d(in_planes,planes,kernel_size=1,stride=stride,bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self,x):
        out=F.relu(self.bn1(self.conv1(x)))
        out=self.bn2(self.conv2(out))
        out+=self.shortcut(x) # skip connection
        out=F.relu(out)
        return out

In [48]:
# ResNet 클래스 정의
class ResNet(nn.Module):
    def __init__(self,block,num_blocks,num_classes=10):
        super(ResNet,self).__init__()
        self.in_planes=64

        self.conv1=nn.Conv2d(1,64,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(64)
        self.layer1=self._make_layer(block,64,num_blocks[0],stride=1)
        self.layer2=self._make_layer(block,128,num_blocks[1],stride=2)
        self.layer3=self._make_layer(block,256,num_blocks[2],stride=2)
        self.layer4=self._make_layer(block,512,num_blocks[3],stride=2)
        self.linear=nn.Linear(512,num_classes)


    def _make_layer(self,block,planes,num_blocks,stride):
        strides=[stride]+[1]*(num_blocks-1)
        layers=[]
        for stride in strides:
            layers.append(block(self.in_planes,planes,stride))
            self.in_planes=planes
        return nn.Sequential(*layers)

    def forward(self,x):
        out=F.relu(self.bn1(self.conv1(x)))
        out=self.layer1(out)
        out=self.layer2(out)
        out=self.layer3(out)
        out=self.layer4(out)
        out=F.avg_pool2d(out,4)
        out=out.view(out.size(0),-1)
        out=self.linear(out)
        out=F.softmax(out)
        return out

In [49]:
def ResNet18():
    return ResNet(BasicBlock,[2,2,2,2])

In [50]:
import torchvision
import torchvision.transforms as transforms

In [51]:
transform_train=transforms.Compose([
    transforms.ToTensor(),
])
transform_test=transforms.Compose([
    transforms.ToTensor(),
])

In [52]:
train_dataset=torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transform_train)
test_dataset=torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transform_test)

In [53]:
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=128,shuffle=True,num_workers=2)
test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=100,shuffle=False,num_workers=2)

In [54]:
device='cuda'

net=ResNet18()
net=net.to(device)
net=torch.nn.DataParallel(net)
cudnn.benchmark=True

In [55]:
learning_rate = 0.01
file_name = 'resnet18_mnist.pt'

In [56]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0002)

In [63]:
def train(epoch):
    print('\n[ Train epoch: %d]'%epoch)
    net.train()
    train_loss=0
    correct=0
    total=0
    for batch_idx,(inputs,targets) in enumerate(train_loader):
        inputs,targets=inputs.to(device),targets.to(device)
        optimizer.zero_grad()

        begin_outputs=net(inputs)
        loss=criterion(begin_outputs,targets)
        loss.backward()

        optimizer.step()
        train_loss+=loss.item()
        _,predicted=begin_outputs.max(1)

        total+=targets.size(0)
        correct+=predicted.eq(targets).sum().item()

        # if  batch_idx%100==0:
        #     print('\nCurrent batch: ',str(batch_idx))
        #     print('Current begin train accuracy: ',str(predicted.eq(targets).sum().item() / targets.size(0)))
        #     print('Current benign train loss:', loss.item())

    print('\nTotal benign train accuarcy:', 100. * correct / total)
    print('Total benign train loss:', train_loss)

In [64]:
def test(epoch):
    print('\n[ Test epoch: %d ]'% epoch)
    net.eval()
    loss=0
    correct=0
    total=0

    for batch_idx,(inputs,targets) in enumerate(test_loader):
        inputs,targets=inputs.to(device),targets.to(device)
        total+=targets.size(0)

        outputs=net(inputs)
        loss+=criterion(outputs,targets).item()

        _,predicted=outputs.max(1)
        correct+=predicted.eq(targets).sum().item()

    print('\nTest accuarcy:', 100. * correct / total)
    print('Test average loss:', loss / total)

    state = {
        'net': net.state_dict()
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, './checkpoint/' + file_name)
    print('Model Saved!')

def adjust_learning_rate(optimizer, epoch):
    lr = learning_rate
    if epoch >= 5:
        lr /= 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [65]:
for epoch in range(0, 10):
    adjust_learning_rate(optimizer, epoch)
    train(epoch)
    test(epoch)


[ Train epoch: 0]

Total benign train accuarcy: 99.92833333333333
Total benign train loss: 1.2393534209113568

[ Test epoch: 0 ]

Test accuarcy: 99.41
Test average loss: 0.00017310794915747466
Model Saved!

[ Train epoch: 1]

Total benign train accuarcy: 99.93666666666667
Total benign train loss: 1.0735367925663013

[ Test epoch: 1 ]

Test accuarcy: 99.59
Test average loss: 0.00013821695503957017
Model Saved!

[ Train epoch: 2]

Total benign train accuarcy: 99.99333333333334
Total benign train loss: 0.28052967760595493

[ Test epoch: 2 ]

Test accuarcy: 99.56
Test average loss: 0.00014164039522556777
Model Saved!

[ Train epoch: 3]

Total benign train accuarcy: 100.0
Total benign train loss: 0.1413768181228079

[ Test epoch: 3 ]

Test accuarcy: 99.44
Test average loss: 0.00017489703559640476
Model Saved!

[ Train epoch: 4]

Total benign train accuarcy: 99.99833333333333
Total benign train loss: 0.12245267607431742

[ Test epoch: 4 ]

Test accuarcy: 99.57
Test average loss: 0.000131261