In [60]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
import os

class Block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size = 3, stride = 1, padding = 1, padding_mode = 'zeros', bias = False):
        super().__init__()
        
        #3x3
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, stride = stride, padding = padding, bias = bias, padding_mode = padding_mode)
        #BatchNormalization
        self.bn = nn.BatchNorm2d(out_channels)        
        #3x3 - 얘는 stride를 1로 고정시킨다. => 크기 변환은 첫번째에서만 가능
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size = kernel_size, stride = 1, padding = padding, bias = bias, padding_mode = padding_mode)
        #x를 더해주기 위함
        self.addx = nn.Sequential()
        if stride != 1: #크기 변환이 이루어졌으면 x를 더할때도 다시 크기를 줄여준다.
            self.addx = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, stride = stride, padding = padding, bias = bias, padding_mode = padding_mode),
                nn.BatchNorm2d(out_channels)
            )
        
    def forward(self, x): #conv 통과 > batchn 통과 > relu > 다시 conv > 다시 batchn > x 더하기 > relu
        out = self.bn(self.conv2(F.relu(self.bn(self.conv1(x)))))
        out += self.addx(x)
        out = F.relu(out)
        return out
    

    
class ResNet(nn.Module):
    def __init__(self, block, n_block, img_channels, n_classes = 10):
        super().__init__()
        
        self.in_channels = 64 #처음 시작은 다 64이므로 고정시킴.
        
        self.conv1 = nn.Conv2d(img_channels, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self.make_layer(block, 64, n_block[0], stride=1)
        self.layer2 = self.make_layer(block, 128, n_block[1], stride=2)
        self.layer3 = self.make_layer(block, 256, n_block[2], stride=2)
        self.layer4 = self.make_layer(block, 512, n_block[3], stride=2)
        self.linear = nn.Linear(512, n_classes)        
        
    def make_layer(self, block, out_channels, n_block, stride):
        layers = []
        tmpstride = stride
        for i in range(n_block):
            layers.append(block(self.in_channels, out_channels, stride = tmpstride))
            if i==0:
                self.in_channels = out_channels
                tmpstride = 1
        return nn.Sequential(*layers)    
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        
        return out
    
    
    
def ResNet34(img_channels):
    return ResNet(Block, [3, 4, 6, 3], img_channels = img_channels)



In [61]:
import torchvision
import torchvision.transforms as transforms
from pathlib import Path

DATA_PATH = Path("data")
PATH = DATA_PATH/"mnist"

download_ = False
if not (PATH).exists():
    download_ = True
    


transform_train = transforms.Compose([
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=download_, transform=transform_train)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=download_, transform=transform_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)



In [87]:
device = 'cuda'

model = ResNet34(1)
model = model.to(device)
model = torch.nn.DataParallel(model)
cudnn.benchmark = True

lr = 0.01
file_name = 'ResNet34_mnist.pt'

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum = 0.9, weight_decay = 0.0001)


def train(epoch, model):
    print('\n [Train - epoch is {0}]'.format(epoch))
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for batch_index, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        middle_outputs = model(inputs)
        loss = criterion(middle_outputs, labels)
        loss.backward()
        
        optimizer.step()
        train_loss += loss.item()
        _, predicted = middle_outputs.max(1)
        
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        if batch_index%100 == 0:
            print('\nBatch Number : {0}'.format(batch_index))
            print(' -- acuuracy is {0}, loss is {1}'.format(predicted.eq(labels).sum().item()/labels.size(0), loss.item()))

    print('\n-----Train Finished------')
    print('\nTotal accuracy is {0} and loss is {1}'.format(100.*correct/total, train_loss))



def test(epoch, model):
    print('\n [Test - epoch is {0}]'.format(epoch))
    model.train()
    loss = 0
    correct = 0
    total = 0
    
    for batch_index, (inputs, labels) in enumerate(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        total += labels.size(0)
        
        outputs = model(inputs)
        loss += criterion(outputs, labels).item()
        
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        
    print('\n-----Test Finished------')
    print('\nTotal accuracy is {0} and loss is {1}'.format(100.*correct/total, loss/total))

    state = {
            'model' : model.state_dict(),
            'epoch': epoch
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, './checkpoint/' + file_name)
    print('Model Saved!')   

    

def adjust_learning_rate(optimizer, epoch):
    lr_ = lr
    if epoch >= 5:
        lr_ /= 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr_

In [88]:
for epoch in range(0, 3):
    adjust_learning_rate(optimizer, epoch)
    train(epoch, model)
    test(epoch, model)


 [Train - epoch is 0]

Batch Number : 0
 -- acuuracy is 0.0859375, loss is 2.472703218460083

Batch Number : 100
 -- acuuracy is 0.9609375, loss is 0.19474735856056213

Batch Number : 200
 -- acuuracy is 0.9609375, loss is 0.10532896220684052

Batch Number : 300
 -- acuuracy is 0.9765625, loss is 0.09330513328313828

Batch Number : 400
 -- acuuracy is 0.9921875, loss is 0.019575249403715134

-----Train Finished------

Total accuracy is 95.965 and loss is 60.67583430139348

 [Test - epoch is 0]

-----Test Finished------

Total accuracy is 98.81 and loss is 0.00038161548609496097
Model Saved!

 [Train - epoch is 1]

Batch Number : 0
 -- acuuracy is 1.0, loss is 0.0026470255106687546

Batch Number : 100
 -- acuuracy is 0.9921875, loss is 0.032948821783065796

Batch Number : 200
 -- acuuracy is 1.0, loss is 0.002634999807924032

Batch Number : 300
 -- acuuracy is 0.9765625, loss is 0.0501578263938427

Batch Number : 400
 -- acuuracy is 1.0, loss is 0.005424954928457737

-----Train Finishe

In [91]:
#file_name = "ResNet34_mnist.pt"
device = 'cuda'

current_model = ResNet34(1)
current_model = current_model.to(device)
current_model = torch.nn.DataParallel(current_model)
cudnn.benchmark = True


checkpoint = torch.load('./checkpoint/ResNet34_mnist.pt')
current_model.load_state_dict(checkpoint['model'])
epochs = checkpoint['epoch']

for epoch in range(epochs+1, epochs+3):
    adjust_learning_rate(optimizer, epoch)
    train(epoch, current_model)
    test(epoch, current_model)


 [Train - epoch is 4]

Batch Number : 0
 -- acuuracy is 1.0, loss is 0.009044741280376911

Batch Number : 100
 -- acuuracy is 1.0, loss is 0.002014781814068556

Batch Number : 200
 -- acuuracy is 1.0, loss is 0.007651323452591896

Batch Number : 300
 -- acuuracy is 0.9921875, loss is 0.01760442554950714

Batch Number : 400
 -- acuuracy is 0.9921875, loss is 0.05480268970131874

-----Train Finished------

Total accuracy is 99.575 and loss is 6.394595069024945

 [Test - epoch is 4]

-----Test Finished------

Total accuracy is 99.3 and loss is 0.0002268722155251453
Model Saved!

 [Train - epoch is 5]

Batch Number : 0
 -- acuuracy is 1.0, loss is 0.0007588269654661417

Batch Number : 100
 -- acuuracy is 0.9921875, loss is 0.02355588786303997

Batch Number : 200
 -- acuuracy is 1.0, loss is 0.005156009923666716

Batch Number : 300
 -- acuuracy is 1.0, loss is 0.0016087691765278578

Batch Number : 400
 -- acuuracy is 1.0, loss is 0.005771078169345856

-----Train Finished------

Total accur