In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import torch.nn.init as init
import time

### parameter ###
batchSize = 64
setEpoch = 300
### parameter ###

### dataset ###

#Set normalizer
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
#Set transform function
transform_train = transforms.Compose(
    [transforms.ToTensor(),
     normalize])
# transform_train = transforms.Compose(
#     [transforms.RandomCrop(32),
#      transforms.RandomHorizontalFlip(),
#      transforms.ToTensor(),
#      normalize])
transform_test = transforms.Compose(
    [transforms.ToTensor(),
     normalize])

#set dataset  
trainset = torchvision.datasets.CIFAR10(root='../data', train=True,
                                        download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=transform_test)
#set loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize,
                                          shuffle=True, num_workers=2, pin_memory=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchSize,
                                         shuffle=False, num_workers=2,pin_memory=True)
#set class label on dataset
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#import matplotlib.pyplot as plt
#import numpy as np


Files already downloaded and verified
Files already downloaded and verified


In [2]:
#define NN

def _weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class basicBlock(nn.Module):
    def __init__(self, inChannel, growthRate, layerDepth, dropRate = 0.2):
        super(basicBlock, self).__init__()
        self.layerDepth = layerDepth
        layers = []
        for i in range(layerDepth):
            layers.append(nn.BatchNorm2d(inChannel))
            # 1x1 conv
            layers.append(nn.Conv2d(inChannel, 4*growthRate, kernel_size=1, bias=False))
            layers.append(nn.BatchNorm2d(4*growthRate))
            # 3x3 conv
            layers.append(nn.Conv2d(4*growthRate, growthRate, kernel_size=3, padding = 1, bias=False))
            inChannel += growthRate
            
        self.moduleList = nn.ModuleList(layers)
        self.dropRate = dropRate
    def forward(self, x):
        for i in range(self.layerDepth):
            #BN, ReLU
            out = F.relu(self.moduleList[4*i](x))
            #1x1 conv (bottleNeck)
            out = self.moduleList[4*i + 1](out)
            #BN, ReLU
            out = F.relu(self.moduleList[4*i + 2](out))
            #3x3 conv
            out = self.moduleList[4*i + 3](out)    
            #dropout
            out = F.dropout(out, p = self.dropRate , training=self.training)
            #concatation
            x = torch.cat((x,out),dim=1)
        return x

class denseNet(nn.Module):
    def _theta(self,layerDepth):
        return int(layerDepth*self.theta)
    
    def __init__(self):
        super(denseNet, self).__init__()
        #parameter
        self.inChannel = 24
        self.growthRate = 12
        self.layerDepth = 16
        self.theta = 0.5 
        
        self.conv0 = nn.Conv2d(3, self.inChannel , kernel_size=3, padding = 1, bias = False)
        
        #transicion layer
        TLdepth1 = self.inChannel + (self.growthRate)*self.layerDepth
        self.TLbn1 = (nn.BatchNorm2d(TLdepth1))
        self.convTL1  = nn.Conv2d(TLdepth1, self._theta(TLdepth1), kernel_size = 1, bias = False)
        self.avgPool1 = nn.AvgPool2d(2, stride = 2)
        
        TLdepth2 = self._theta(TLdepth1) + (self.growthRate)*self.layerDepth
        self.TLbn2 = (nn.BatchNorm2d(TLdepth2))
        self.convTL2  = nn.Conv2d(TLdepth2, self._theta(TLdepth2), kernel_size = 1, bias = False)
        self.avgPool2 = nn.AvgPool2d(2, stride = 2)
        
        #dense blk
        #when cifar, layer's depth are same
        self.dense1 = basicBlock(self.inChannel, self.growthRate, self.layerDepth)
        self.dense2 = basicBlock(self._theta(TLdepth1), self.growthRate, self.layerDepth)
        self.dense3 =  basicBlock(self._theta(TLdepth2), self.growthRate, self.layerDepth)
        
        #classification Layer
        depthClass = self._theta(TLdepth2) +  (self.growthRate)*self.layerDepth
        self.GlobalAvgPool = nn.AvgPool2d(8, stride = 1)
        self.linear = nn.Linear(depthClass, 10)
        
        
        self.apply(_weights_init)

    
    def forward(self, x):
        #conv
        x = self.conv0(x)
        
        #dense blk 1
        x = self.dense1(x)
        #TL - BL -> ReLu -> Conv -> AvgPool 
        x = self.avgPool1(self.convTL1(F.relu(self.TLbn1(x))))
        
        #dense blk 2
        x = self.dense2(x)
        #TL - BL -> ReLu -> Conv -> AvgPool
        x = self.avgPool2(self.convTL2(F.relu(self.TLbn2(x))))
        
        #dense blk 3
        x = self.dense3(x)
        
        #fc
        x = self.GlobalAvgPool(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

In [3]:
net = denseNet()
device = torch.device("cuda:0")
net.to(device)
summary(net,(3,32,32))
#use cuda instead cpu 

#loss function 
criterion = nn.L1Loss()
#optimizer
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
#eopch 100 -> lr = 0.01, epoch 150 -> lr = 0.001
decay_epoch = [int(setEpoch/2), int(3*setEpoch/4)]
step_lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 32, 32]             648
       BatchNorm2d-2           [-1, 24, 32, 32]              48
            Conv2d-3           [-1, 48, 32, 32]           1,152
       BatchNorm2d-4           [-1, 48, 32, 32]              96
            Conv2d-5           [-1, 12, 32, 32]           5,184
       BatchNorm2d-6           [-1, 36, 32, 32]              72
            Conv2d-7           [-1, 48, 32, 32]           1,728
       BatchNorm2d-8           [-1, 48, 32, 32]              96
            Conv2d-9           [-1, 12, 32, 32]           5,184
      BatchNorm2d-10           [-1, 48, 32, 32]              96
           Conv2d-11           [-1, 48, 32, 32]           2,304
      BatchNorm2d-12           [-1, 48, 32, 32]              96
           Conv2d-13           [-1, 12, 32, 32]           5,184
      BatchNorm2d-14           [-1, 60,

In [4]:

print(net)


denseNet(
  (conv0): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (TLbn1): BatchNorm2d(216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (convTL1): Conv2d(216, 108, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (avgPool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (TLbn2): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (convTL2): Conv2d(300, 150, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (avgPool2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (dense1): basicBlock(
    (moduleList): ModuleList(
      (0): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(36, eps=1e-05, momentum

In [5]:

#print all layer of NN




In [6]:
testCorrect = []
trainingLoss = []
print('hing')
for epoch in range(setEpoch):
    start = time.time()
    net.train()
    running_loss = 0.0
    #print(epoch)
    for i, data in enumerate(trainloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)
        #print(i)
        # 변화도(Gradient) 매개변수를 0으로 만들고
        

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
                  
    step_lr_scheduler.step()
    
    print('epoch : %5d time: %0.4f loss : %.7f' %(epoch + 1, time.time() - start,(running_loss /int(50000/batchSize))))
    trainingLoss.append((running_loss /int(50000/batchSize)))
    
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    testCorrect.append(100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))


hing
epoch :     1 time: 87.7571 loss : 1.6317173
Accuracy of the network on the 10000 test images: 50 %
epoch :     2 time: 84.2863 loss : 1.0881300
Accuracy of the network on the 10000 test images: 63 %
epoch :     3 time: 85.0697 loss : 0.8345923
Accuracy of the network on the 10000 test images: 68 %
epoch :     4 time: 86.2856 loss : 0.6924507
Accuracy of the network on the 10000 test images: 68 %
epoch :     5 time: 86.3886 loss : 0.6040313
Accuracy of the network on the 10000 test images: 75 %
epoch :     6 time: 83.4322 loss : 0.5404195
Accuracy of the network on the 10000 test images: 82 %
epoch :     7 time: 86.1806 loss : 0.5017599
Accuracy of the network on the 10000 test images: 81 %
epoch :     8 time: 84.9636 loss : 0.4613418
Accuracy of the network on the 10000 test images: 83 %
epoch :     9 time: 85.2541 loss : 0.4380511
Accuracy of the network on the 10000 test images: 83 %
epoch :    10 time: 85.1772 loss : 0.4256793
Accuracy of the network on the 10000 test images: 

epoch :    83 time: 86.0329 loss : 0.2058892
Accuracy of the network on the 10000 test images: 84 %
epoch :    84 time: 85.8819 loss : 0.2109311
Accuracy of the network on the 10000 test images: 87 %
epoch :    85 time: 85.2143 loss : 0.2143185
Accuracy of the network on the 10000 test images: 87 %
epoch :    86 time: 84.9764 loss : 0.2159796
Accuracy of the network on the 10000 test images: 88 %
epoch :    87 time: 85.9978 loss : 0.2058173
Accuracy of the network on the 10000 test images: 88 %
epoch :    88 time: 85.9097 loss : 0.2123542
Accuracy of the network on the 10000 test images: 85 %
epoch :    89 time: 83.9200 loss : 0.2125669
Accuracy of the network on the 10000 test images: 88 %
epoch :    90 time: 85.9038 loss : 0.2111684
Accuracy of the network on the 10000 test images: 84 %
epoch :    91 time: 86.1290 loss : 0.2078477
Accuracy of the network on the 10000 test images: 87 %
epoch :    92 time: 85.9895 loss : 0.2113779
Accuracy of the network on the 10000 test images: 88 %


epoch :   165 time: 84.8333 loss : 0.0086240
Accuracy of the network on the 10000 test images: 93 %
epoch :   166 time: 87.3734 loss : 0.0084265
Accuracy of the network on the 10000 test images: 93 %
epoch :   167 time: 85.6382 loss : 0.0087326
Accuracy of the network on the 10000 test images: 93 %
epoch :   168 time: 84.1306 loss : 0.0077897
Accuracy of the network on the 10000 test images: 93 %
epoch :   169 time: 83.5521 loss : 0.0087147
Accuracy of the network on the 10000 test images: 93 %
epoch :   170 time: 84.9014 loss : 0.0081743
Accuracy of the network on the 10000 test images: 93 %
epoch :   171 time: 85.5647 loss : 0.0073723
Accuracy of the network on the 10000 test images: 93 %
epoch :   172 time: 85.4390 loss : 0.0061324
Accuracy of the network on the 10000 test images: 93 %
epoch :   173 time: 84.2050 loss : 0.0073091
Accuracy of the network on the 10000 test images: 93 %
epoch :   174 time: 84.5824 loss : 0.0064653
Accuracy of the network on the 10000 test images: 93 %


In [7]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 93.170000 %


In [8]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels =  data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 96 %
Accuracy of   car : 94 %
Accuracy of  bird : 87 %
Accuracy of   cat : 87 %
Accuracy of  deer : 92 %
Accuracy of   dog : 86 %
Accuracy of  frog : 94 %
Accuracy of horse : 96 %
Accuracy of  ship : 96 %
Accuracy of truck : 93 %


In [9]:
PATH = '../model/denseNetBCCIFAR10Rev1.pth'
torch.save(net.state_dict(), PATH)

In [21]:
for epoch in range(200,300):
    start = time.time()
    net.train()
    running_loss = 0.0
    #print(epoch)
    for i, data in enumerate(trainloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)
        #print(i)
        # 변화도(Gradient) 매개변수를 0으로 만들고
        

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
                  
    step_lr_scheduler.step()
    
    print('epoch : %5d time: %0.4f loss : %.7f' %(epoch + 1, time.time() - start,(running_loss /int(50000/batchSize))))
    trainingLoss.append((running_loss /int(50000/batchSize)))
    
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    testCorrect.append(100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

epoch :   201 time: 86.1131 loss : 0.0052231
Accuracy of the network on the 10000 test images: 93 %
epoch :   202 time: 85.4969 loss : 0.0055791
Accuracy of the network on the 10000 test images: 93 %
epoch :   203 time: 85.6240 loss : 0.0067457
Accuracy of the network on the 10000 test images: 93 %
epoch :   204 time: 85.4650 loss : 0.0058163
Accuracy of the network on the 10000 test images: 93 %
epoch :   205 time: 85.6663 loss : 0.0061803
Accuracy of the network on the 10000 test images: 93 %
epoch :   206 time: 85.7904 loss : 0.0058150
Accuracy of the network on the 10000 test images: 93 %
epoch :   207 time: 84.7634 loss : 0.0072610
Accuracy of the network on the 10000 test images: 93 %
epoch :   208 time: 84.8469 loss : 0.0050706
Accuracy of the network on the 10000 test images: 93 %
epoch :   209 time: 85.1819 loss : 0.0055232
Accuracy of the network on the 10000 test images: 93 %
epoch :   210 time: 85.2907 loss : 0.0054454
Accuracy of the network on the 10000 test images: 93 %


epoch :   283 time: 84.4734 loss : 0.0016236
Accuracy of the network on the 10000 test images: 93 %
epoch :   284 time: 86.7196 loss : 0.0013790
Accuracy of the network on the 10000 test images: 93 %
epoch :   285 time: 84.3812 loss : 0.0015662
Accuracy of the network on the 10000 test images: 93 %
epoch :   286 time: 85.0443 loss : 0.0013488
Accuracy of the network on the 10000 test images: 93 %
epoch :   287 time: 84.8823 loss : 0.0017182
Accuracy of the network on the 10000 test images: 93 %
epoch :   288 time: 86.3123 loss : 0.0013612
Accuracy of the network on the 10000 test images: 93 %
epoch :   289 time: 85.0467 loss : 0.0013547
Accuracy of the network on the 10000 test images: 93 %
epoch :   290 time: 84.3854 loss : 0.0015482
Accuracy of the network on the 10000 test images: 93 %
epoch :   291 time: 86.4031 loss : 0.0014095
Accuracy of the network on the 10000 test images: 93 %
epoch :   292 time: 85.1683 loss : 0.0011925
Accuracy of the network on the 10000 test images: 93 %


In [23]:
f = open("denseNetBC_accu.csv",'w')
f.write("testAccu, trainLoss\n")
for i in  range(len(testCorrect)):
    f.write("%2.2f, %.7f\n" %(testCorrect[i], trainingLoss[i]))
f.close()

In [22]:
print(testCorrect)
print(trainingLoss)

[50.36, 63.59, 68.67, 68.42, 75.55, 82.18, 81.57, 83.3, 83.59, 83.66, 82.44, 84.27, 84.02, 78.9, 82.79, 84.17, 85.42, 79.69, 84.75, 84.97, 83.9, 85.87, 85.47, 87.16, 83.32, 83.9, 84.76, 85.57, 84.5, 84.85, 83.44, 85.56, 83.04, 85.78, 86.53, 85.84, 85.7, 87.03, 82.29, 83.65, 87.89, 85.76, 87.46, 86.56, 87.79, 86.15, 82.46, 86.67, 87.33, 83.18, 87.53, 83.88, 87.92, 87.6, 89.3, 88.07, 86.54, 87.19, 85.96, 85.3, 83.73, 87.66, 86.27, 89.19, 87.04, 85.71, 86.77, 87.75, 87.58, 86.61, 88.06, 86.26, 85.7, 88.83, 85.98, 89.45, 84.77, 86.04, 87.04, 84.74, 87.27, 88.55, 84.7, 87.97, 87.2, 88.22, 88.29, 85.02, 88.18, 84.43, 87.39, 88.32, 85.81, 87.77, 87.28, 88.95, 86.82, 87.72, 87.67, 87.05, 87.84, 88.91, 83.67, 86.91, 88.06, 86.29, 85.29, 88.13, 88.7, 88.94, 85.76, 87.33, 87.54, 86.29, 88.32, 86.81, 87.02, 86.62, 88.42, 86.07, 88.33, 88.63, 89.4, 89.37, 87.02, 88.66, 87.65, 88.93, 88.08, 87.73, 88.51, 78.27, 88.53, 88.02, 89.43, 89.03, 88.57, 87.12, 87.7, 87.29, 87.43, 86.67, 87.34, 87.33, 89.49,