# DenseNet

In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# 배치 사이즈, learning rate, 총 layer 수를 파라미터로 설정 해놓음
batch_size=64
learning_rate = 0.1
layers = 100

# Train, Test set 받아오기
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    ])

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data',train=True,download=True,transform=transform_train),
    batch_size=batch_size,shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data',train=False,transform=transform_test),
    batch_size=batch_size,shuffle=True
)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/cifar-10-python.tar.gz to ../data


In [None]:
# Basic
class BasicBlock(nn.Module):
    def __init__(self,in_planes,out_planes,dropRate = 0.0):      
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace = True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride = 1, padding = 1, bias = False)
        self.droprate = dropRate
        
    def forward(self,x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate>0:
            out = F.dropout (out,p=self.droprate,training = self.training)
        return torch.cat([x,out],1)
        

# Bottleneck Layer : ResNet, Inception처럼 bottleneck 구조를 사용함
# 3x3 convolution 전에 1x1 convolution을 거쳐서 입력 feature map의 channel 개수를 줄이는 것 까지는 같은데
# 그 뒤로 다시 입력 feature map의 channel 개수 만큼을 생성하는 대신 growth rate 만큼의 feature map을 생성하는 것이 차이 점
# -> Computational cost 줄이기 가능
class BottleneckBlock(nn.Module):
    def __init__(self,in_planes,out_planes,dropRate=0.0):
        super(BottleneckBlock,self).__init__()
        inter_planes = out_planes * 4
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace = True)
        self.conv1 = nn.Conv2d(in_planes,inter_planes,kernel_size=1,stride=1,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(inter_planes)
        self.conv2 = nn.Conv2d(inter_planes,out_planes,kernel_size=3,stride=1,padding=0,bias=False)
        self.droprate = dropRate
        
    def forward(self,x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate>0:
            out = F.dropout(out,p=self.droprate,inplace=False,training = self.training)
        out = self.conv2(self.relu(self.bn2(out)))
        if self.droprate>0:
            out = F.dropout(out,p=self.droprate,inplace=False,training = self.training)
        return torch.cat([x,out],1) 
        

In [None]:
class DenseBlock(nn.Module):
    def __init__(self,nb_layers,in_planes,growh_rate,block,dropRate=0.0):
        super(DenseBlock,self).__init__()
        self.layer = self._make_layer(block, in_planes, growh_rate, nb_layers, dropRate)
    
    def _make_layer(self,block,in_planes,growh_rate,nb_layers,dropRate):
        layers=[]
        for i in range(nb_layers):
            layers.append(block(in_planes + i*growh_rate ,growh_rate,dropRate))
            
        return nn.Sequential(*layers)
    
    def forward(self,x):
        return self.layer(x)


In [None]:
class TransitionBlock(nn.Module):
    def __init__(self,in_planes,out_planes,dropRate=0.0):
        super(TransitionBlock,self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=1,padding=0,bias=False)
        self.droprate = dropRate
        
    def forward(self,x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate>0:
            out = F.dropout(out,p=self.droprate,inplace=False,training=self.training)
        return F.avg_pool2d(out,2)

In [None]:
class DenseNet(nn.Module):
    def __init__(self,depth,num_classes,growh_rate=12,reduction=0.5,bottleneck=True,dropRate=0.0):
        super(DenseNet,self).__init__()
        num_of_blocks = 3
        in_planes = 16 
        n = (depth - num_of_blocks - 1)/num_of_blocks # 총 depth에서 첫 conv , 2개의 transit , 마지막 linear 빼고 / num_of_blocks
        if reduction != 1 :
            in_planes = 2 * growh_rate
        if bottleneck == True:
            in_planes = 2 * growh_rate 
            n = n/2 
            block = BottleneckBlock 
        else :
            block = BasicBlock
        
        n = int(n) 
        self.conv1 = nn.Conv2d(3,in_planes,kernel_size=3,stride=1,padding=1,bias=False) 
        
        
        #1st block
        
        self.block1 = DenseBlock(n,in_planes,growh_rate,block,dropRate)
        in_planes = int(in_planes+n*growh_rate) 
        
        # in_planes,out_planes,dropRate
        self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)),dropRate=dropRate)
        in_planes = int(math.floor(in_planes*reduction))
        
        
        #2nd block
        # nb_layers,in_planes,growh_rate,block,dropRate
        self.block2 = DenseBlock(n,in_planes,growh_rate,block,dropRate)
        in_planes = int(in_planes+n*growh_rate)
        
        # in_planes,out_planes,dropRate
        self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)),dropRate=dropRate)
        in_planes = int(math.floor(in_planes*reduction))
        
        
        #3rd block
        # nb_layers,in_planes,growh_rate,block,dropRate
        self.block3 = DenseBlock(n,in_planes,growh_rate,block,dropRate)
        in_planes = int(in_planes+n*growh_rate)
        
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace = True)
        
        self.fc = nn.Linear(in_planes,num_classes)
        
        self.in_planes = in_planes
        
        # module 초기화
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1) # 
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
        
    def forward(self,x):
        #x : 32*32
        out = self.conv1(x) # 32*32
        out = self.block1(out) # 32*32
        out = self.trans1(out) # 16*16
        out = self.block2(out) # 16*16
        out = self.trans2(out) # 8*8
        out = self.block3(out) # 8*8
        out = self.relu(self.bn1(out)) #8*8
        out = F.avg_pool2d(out,8) #1*1
        out = out.view(-1, self.in_planes)
        return self.fc(out)



In [None]:

# depth,num_classes <- cifar '10' ,growh_rate=12,reduction=0.5,bottleneck=True,dropRate=0.0


#model = torch.load('DenseNetModelSave.pt')
model = DenseNet(layers,10,growh_rate=12,dropRate = 0.0)


# get the number of model parameters

print('Number of model parameters: {}'.format(
    sum([p.data.nelement() for p in model.parameters()])))

model = model.to(device)

criterion = nn.CrossEntropyLoss().to(device)
#optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate,
                            momentum=0.9,nesterov=True,weight_decay=1e-4)


Number of model parameters: 769162


In [None]:
def train(train_loader,model,criterion,optimizer,epoch):
    model.train()
    for i, (input,target) in enumerate(train_loader):
        target = target.to(device)
        input = input.to(device)
        
        output = model(input)
        loss = criterion(output,target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(i%20 == 0):
            print("loss in epoch %d , step %d : %f" % (epoch, i,loss.data))




In [None]:
def test(test_loader,model,criterion,epoch):
    model.eval()
    
    correct = 0
    
    
    for i, (input,target) in enumerate(test_loader):
        target = target.to(device)
        input = input.to(device)
        
        output = model(input)
        loss = criterion(output,target)
        
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().float().sum()
    
    print("Accuracy in epoch %d : %f" % (epoch,100.0*correct/len(test_loader.dataset)))


In [None]:
def adjust_lr(optimizer, epoch, learning_rate):
    if epoch==15:
        learning_rate*=0.1
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate


In [None]:
for epoch in range(0,30):
    adjust_lr(optimizer,epoch,learning_rate)
    train(train_loader,model,criterion,optimizer,epoch)
    test(test_loader,model,criterion,epoch)


loss in epoch 0 , step 0 : 2.304410
loss in epoch 0 , step 20 : 1.905923
loss in epoch 0 , step 40 : 2.070671
loss in epoch 0 , step 60 : 1.868646
loss in epoch 0 , step 80 : 1.808228
loss in epoch 0 , step 100 : 1.658661
loss in epoch 0 , step 120 : 1.531688
loss in epoch 0 , step 140 : 1.671514
loss in epoch 0 , step 160 : 1.429664
loss in epoch 0 , step 180 : 1.788417
loss in epoch 0 , step 200 : 1.362588
loss in epoch 0 , step 220 : 1.624362
loss in epoch 0 , step 240 : 1.582504
loss in epoch 0 , step 260 : 1.437093
loss in epoch 0 , step 280 : 1.611432
loss in epoch 0 , step 300 : 1.347045
loss in epoch 0 , step 320 : 1.378518
loss in epoch 0 , step 340 : 1.646602
loss in epoch 0 , step 360 : 1.180884
loss in epoch 0 , step 380 : 1.486677
loss in epoch 0 , step 400 : 1.282289
loss in epoch 0 , step 420 : 1.376388
loss in epoch 0 , step 440 : 1.471908
loss in epoch 0 , step 460 : 1.159019
loss in epoch 0 , step 480 : 1.431210
loss in epoch 0 , step 500 : 1.058682
loss in epoch 0 , 