In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import torch.nn.init as init
import time

### parameter ###
batchSize = 64
setEpoch = 300
### parameter ###

### dataset ###

#Set normalizer
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
#Set transform function
transform_train = transforms.Compose(
    [transforms.ToTensor(),
     normalize])
# transform_train = transforms.Compose(
#     [transforms.RandomCrop(32),
#      transforms.RandomHorizontalFlip(),
#      transforms.ToTensor(),
#      normalize])
transform_test = transforms.Compose(
    [transforms.ToTensor(),
     normalize])

#set dataset  
trainset = torchvision.datasets.CIFAR10(root='../data', train=True,
                                        download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=transform_test)
#set loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize,
                                          shuffle=True, num_workers=2, pin_memory=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchSize,
                                         shuffle=False, num_workers=2,pin_memory=True)
#set class label on dataset
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#import matplotlib.pyplot as plt
#import numpy as np


Files already downloaded and verified
Files already downloaded and verified


In [4]:
#define NN

def _weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class basicBlock(nn.Module):
    def __init__(self, inChannel, growthRate, layerDepth, dropRate = 0.2):
        super(basicBlock, self).__init__()
        self.layerDepth = layerDepth
        layers = []
        for i in range(layerDepth):
            layers.append(nn.BatchNorm2d(inChannel))
            # 1x1 conv
            layers.append(nn.Conv2d(inChannel, 4*growthRate, kernel_size=1, bias=False))
            layers.append(nn.BatchNorm2d(4*growthRate))
            # 3x3 conv
            layers.append(nn.Conv2d(4*growthRate, growthRate, kernel_size=3, padding = 1, bias=False))
            inChannel += growthRate
            
        self.moduleList = nn.ModuleList(layers)
        self.dropRate = dropRate
    def forward(self, x):
        for i in range(self.layerDepth):
            #BN, ReLU
            out = F.relu(self.moduleList[4*i](x))
            #1x1 conv (bottleNeck)
            out = self.moduleList[4*i + 1](out)
            #BN, ReLU
            out = F.relu(self.moduleList[4*i + 2](out))
            #3x3 conv
            out = self.moduleList[4*i + 3](out)    
            #dropout
            out = F.dropout(out, p = self.dropRate , training=self.training)
            #concatation
            x = torch.cat((x,out),dim=1)
        return x

class denseNet(nn.Module):
    def _theta(self,layerDepth):
        return int(layerDepth*self.theta)
    
    def __init__(self):
        super(denseNet, self).__init__()
        #parameter
        self.inChannel = 24
        self.growthRate = 12
        self.layerDepth = 16
        self.theta = 0.5 
        
        self.conv0 = nn.Conv2d(3, self.inChannel , kernel_size=3, padding = 1, bias = False)
        
        #transicion layer
        TLdepth1 = self.inChannel + (self.growthRate)*self.layerDepth
        self.TLbn1 = (nn.BatchNorm2d(TLdepth1))
        self.convTL1  = nn.Conv2d(TLdepth1, self._theta(TLdepth1), kernel_size = 1, bias = False)
        self.avgPool1 = nn.AvgPool2d(2, stride = 2)
        
        TLdepth2 = self._theta(TLdepth1) + (self.growthRate)*self.layerDepth
        self.TLbn2 = (nn.BatchNorm2d(TLdepth2))
        self.convTL2  = nn.Conv2d(TLdepth2, self._theta(TLdepth2), kernel_size = 1, bias = False)
        self.avgPool2 = nn.AvgPool2d(2, stride = 2)
        
        #dense blk
        #when cifar, layer's depth are same
        self.dense1 = basicBlock(self.inChannel, self.growthRate, self.layerDepth)
        self.dense2 = basicBlock(self._theta(TLdepth1), self.growthRate, self.layerDepth)
        self.dense3 =  basicBlock(self._theta(TLdepth2), self.growthRate, self.layerDepth)
        
        #classification Layer
        depthClass = self._theta(TLdepth2) +  (self.growthRate)*self.layerDepth
        self.GlobalAvgPool = nn.AvgPool2d(16, stride = 1)
        self.linear = nn.Linear(depthClass, 10)
        
        
        self.apply(_weights_init)

    
    def forward(self, x):
        #conv
        x = self.conv0(x)
        
        #dense blk 1
        x = self.dense1(x)
        #TL - BL -> ReLu -> Conv -> AvgPool 
        x = self.avgPool1(self.convTL1(F.relu(self.TLbn1(x))))
        
        #dense blk 2
        x = self.dense2(x)
        #TL - BL -> ReLu -> Conv -> AvgPool
        x = self.avgPool2(self.convTL2(F.relu(self.TLbn2(x))))
        
        #dense blk 3
        x = self.dense3(x)
        
        #fc
        x = self.GlobalAvgPool(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

In [5]:
net = denseNet()
device = torch.device("cuda:0")
net.to(device)
summary(net,(3,32,32))

#use cuda instead cpu 

#loss function 
criterion = nn.CrossEntropyLoss()
#optimizer
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
#eopch 100 -> lr = 0.01, epoch 150 -> lr = 0.001
decay_epoch = [int(setEpoch/2), int(3*setEpoch/4)]
step_lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=decay_epoch, gamma=0.1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 64, 64]             648
       BatchNorm2d-2           [-1, 24, 64, 64]              48
            Conv2d-3           [-1, 48, 64, 64]           1,152
       BatchNorm2d-4           [-1, 48, 64, 64]              96
            Conv2d-5           [-1, 12, 64, 64]           5,184
       BatchNorm2d-6           [-1, 36, 64, 64]              72
            Conv2d-7           [-1, 48, 64, 64]           1,728
       BatchNorm2d-8           [-1, 48, 64, 64]              96
            Conv2d-9           [-1, 12, 64, 64]           5,184
      BatchNorm2d-10           [-1, 48, 64, 64]              96
           Conv2d-11           [-1, 48, 64, 64]           2,304
      BatchNorm2d-12           [-1, 48, 64, 64]              96
           Conv2d-13           [-1, 12, 64, 64]           5,184
      BatchNorm2d-14           [-1, 60,

In [0]:

print(net)


In [0]:

#print all layer of NN




In [0]:
testCorrect = []
trainingLoss = []
print('hing')
for epoch in range(setEpoch):
    start = time.time()
    net.train()
    running_loss = 0.0
    #print(epoch)
    for i, data in enumerate(trainloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)
        #print(i)
        # 변화도(Gradient) 매개변수를 0으로 만들고
        

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
                  
    step_lr_scheduler.step()
    
    print('epoch : %5d time: %0.4f loss : %.7f' %(epoch + 1, time.time() - start,(running_loss /int(50000/batchSize))))
    trainingLoss.append((running_loss /int(50000/batchSize)))
    
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    testCorrect.append(100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))


In [0]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))


In [0]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels =  data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

In [0]:
PATH = '../model/denseNetBCCIFAR10Rev1.pth'
torch.save(net.state_dict(), PATH)

In [0]:
for epoch in range(200,300):
    start = time.time()
    net.train()
    running_loss = 0.0
    #print(epoch)
    for i, data in enumerate(trainloader, 0):
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)
        #print(i)
        # 변화도(Gradient) 매개변수를 0으로 만들고
        

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
                  
    step_lr_scheduler.step()
    
    print('epoch : %5d time: %0.4f loss : %.7f' %(epoch + 1, time.time() - start,(running_loss /int(50000/batchSize))))
    trainingLoss.append((running_loss /int(50000/batchSize)))
    
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    testCorrect.append(100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

In [0]:
f = open("denseNetBC_accu.csv",'w')
f.write("testAccu, trainLoss\n")
for i in  range(len(testCorrect)):
    f.write("%2.2f, %.7f\n" %(testCorrect[i], trainingLoss[i]))
f.close()

In [0]:
print(testCorrect)
print(trainingLoss)