In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import torch.nn.init as init

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

transform_train = transforms.Compose(
    [transforms.RandomCrop(32),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     normalize])
transform_test = transforms.Compose(
    [transforms.RandomCrop(32),
     transforms.ToTensor(),
     normalize])

trainset = torchvision.datasets.CIFAR10(root='../data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=768,
                                          shuffle=True, num_workers=2, pin_memory=True)

testset = torchvision.datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=512,
                                         shuffle=False, num_workers=2,pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import matplotlib.pyplot as plt
import numpy as np

def _weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class basicBlock(nn.Module):
    def __init__(self, inChannel, outChannel, stride = 1):
        super(basicBlock, self).__init__()
        self.conv_t = nn.Conv2d(inChannel, outChannel, kernel_size=3, stride = stride, padding = 1, bias=False)
        self.conv_m = nn.Conv2d(outChannel, outChannel, kernel_size=3, stride = 1, padding = 1, bias=False)
        self.bn1                 = nn.BatchNorm2d(outChannel)
        self.bn2                 = nn.BatchNorm2d(outChannel)
        self.reDem = nn.Sequential()
        # re demension
        if stride != 1 or inChannel != outChannel:
            self.reDem = nn.Sequential(nn.Conv2d(inChannel, outChannel, kernel_size=1, stride = 2, bias=False),nn.BatchNorm2d(outChannel))
    def forward(self, x):
        out = self.bn2(self.conv_m(F.relu(self.bn1(self.conv_t(x)))))
        # residual addiction
        out = self.reDem(x) + out
        # relu result
        out = F.relu(out)
        return out
        
class resNet(nn.Module):
    def __init__(self):
        super(resNet, self).__init__()
        self.inChannel = 16
        self.conv0 = nn.Conv2d(3, 16, kernel_size=3, padding = 1, bias=False)
        self.bn0   = nn.BatchNorm2d(16)
    
        self.layer1 = self._make_layer(9, 16, 1)
        self.layer2 = self._make_layer(9, 32, 2)
        self.layer3 = self._make_layer(9, 64, 2)
        self.avg_pool = nn.AvgPool2d(8, stride = 1)
        self.linear = nn.Linear(64, 10)
        self.fc = nn.Linear(8*8, 10)
        
        self.apply(_weights_init)
    
    def _make_layer(self, numBlocks, channel,  stride):
        strides = [stride] + [1]*(numBlocks -1)
        layers = []
        for stride in strides :
            layers.append(basicBlock(self.inChannel, channel, stride))
            self.inChannel = channel
        return nn.Sequential(*layers)
    
    def forward(self, x):
        #conv0 224 -> 56
        x = F.relu(self.bn0(self.conv0(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = F.avg_pool2d(x, x.size()[3])
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x


net = resNet()
print(net)

device = torch.device("cuda:0")
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

decay_epoch = [100, 150]
step_lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)



Files already downloaded and verified
Files already downloaded and verified
resNet(
  (conv0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn0): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): basicBlock(
      (conv_t): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv_m): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (reDem): Sequential()
    )
    (1): basicBlock(
      (conv_t): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv_m): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_runn

In [2]:

summary(net,(3,32,32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
       BatchNorm2d-2           [-1, 16, 32, 32]              32
            Conv2d-3           [-1, 16, 32, 32]           2,304
       BatchNorm2d-4           [-1, 16, 32, 32]              32
            Conv2d-5           [-1, 16, 32, 32]           2,304
       BatchNorm2d-6           [-1, 16, 32, 32]              32
        basicBlock-7           [-1, 16, 32, 32]               0
            Conv2d-8           [-1, 16, 32, 32]           2,304
       BatchNorm2d-9           [-1, 16, 32, 32]              32
           Conv2d-10           [-1, 16, 32, 32]           2,304
      BatchNorm2d-11           [-1, 16, 32, 32]              32
       basicBlock-12           [-1, 16, 32, 32]               0
           Conv2d-13           [-1, 16, 32, 32]           2,304
      BatchNorm2d-14           [-1, 16,

In [3]:

print('hing')
for epoch in range(200):
    net.train()
    running_loss = 0.0
    #print(epoch)
    for i, data in enumerate(trainloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)
        #print(i)
        # 변화도(Gradient) 매개변수를 0으로 만들고
        

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
                  
    step_lr_scheduler.step()
    print('epoch : %5d loss : %.7f' %(epoch + 1, (running_loss / 65)))
    
    correct = 0
    total = 0
    #net.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))


hing
epoch :     1 loss : 2.3622616
Accuracy of the network on the 10000 test images: 30 %
epoch :     2 loss : 1.7049124
Accuracy of the network on the 10000 test images: 38 %
epoch :     3 loss : 1.5172529
Accuracy of the network on the 10000 test images: 46 %
epoch :     4 loss : 1.3388647
Accuracy of the network on the 10000 test images: 54 %
epoch :     5 loss : 1.1812409
Accuracy of the network on the 10000 test images: 56 %
epoch :     6 loss : 1.0507733
Accuracy of the network on the 10000 test images: 64 %
epoch :     7 loss : 0.9092663
Accuracy of the network on the 10000 test images: 67 %
epoch :     8 loss : 0.8185395
Accuracy of the network on the 10000 test images: 72 %
epoch :     9 loss : 0.7241904
Accuracy of the network on the 10000 test images: 73 %
epoch :    10 loss : 0.6596391
Accuracy of the network on the 10000 test images: 75 %
epoch :    11 loss : 0.6036499
Accuracy of the network on the 10000 test images: 75 %
epoch :    12 loss : 0.5625653
Accuracy of the ne

Accuracy of the network on the 10000 test images: 85 %
epoch :    97 loss : 0.0078420
Accuracy of the network on the 10000 test images: 85 %
epoch :    98 loss : 0.0041554
Accuracy of the network on the 10000 test images: 85 %
epoch :    99 loss : 0.0036703
Accuracy of the network on the 10000 test images: 85 %
epoch :   100 loss : 0.0132893
Accuracy of the network on the 10000 test images: 84 %
epoch :   101 loss : 0.0072576
Accuracy of the network on the 10000 test images: 85 %
epoch :   102 loss : 0.0027904
Accuracy of the network on the 10000 test images: 85 %
epoch :   103 loss : 0.0017058
Accuracy of the network on the 10000 test images: 85 %
epoch :   104 loss : 0.0014967
Accuracy of the network on the 10000 test images: 85 %
epoch :   105 loss : 0.0014843
Accuracy of the network on the 10000 test images: 85 %
epoch :   106 loss : 0.0014108
Accuracy of the network on the 10000 test images: 85 %
epoch :   107 loss : 0.0013698
Accuracy of the network on the 10000 test images: 85 %

epoch :   192 loss : 0.0005391
Accuracy of the network on the 10000 test images: 85 %
epoch :   193 loss : 0.0005480
Accuracy of the network on the 10000 test images: 85 %
epoch :   194 loss : 0.0004324
Accuracy of the network on the 10000 test images: 85 %
epoch :   195 loss : 0.0004599
Accuracy of the network on the 10000 test images: 85 %
epoch :   196 loss : 0.0007287
Accuracy of the network on the 10000 test images: 85 %
epoch :   197 loss : 0.0004579
Accuracy of the network on the 10000 test images: 85 %
epoch :   198 loss : 0.0004434
Accuracy of the network on the 10000 test images: 85 %
epoch :   199 loss : 0.0005041
Accuracy of the network on the 10000 test images: 85 %
epoch :   200 loss : 0.0004502
Accuracy of the network on the 10000 test images: 85 %


In [4]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 85 %


In [5]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels =  data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 100 %
Accuracy of   car : 100 %
Accuracy of  bird : 85 %
Accuracy of   cat : 66 %
Accuracy of  deer : 71 %
Accuracy of   dog : 66 %
Accuracy of  frog : 54 %
Accuracy of horse : 83 %
Accuracy of  ship : 100 %
Accuracy of truck : 100 %


In [6]:
PATH = '../model/resNetCIFAR10nobias.pth'
torch.save(net.state_dict(), PATH)