In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.models as models
from torchvision import datasets, transforms
import time
import sys
import math

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size=64
learning_rate = 0.1
layers = 121
transform_train = transforms.Compose([ 
                                      transforms.RandomCrop(32, padding=4), 
                                      transforms.RandomHorizontalFlip(), 
                                      transforms.ToTensor(), 
                                      ]) 
transform_test = transforms.Compose([ 
                                     transforms.ToTensor(), 
                                     ]) 
train_loader = torch.utils.data.DataLoader( 
    datasets.CIFAR10('../data',train=True,download=True,transform=transform_train), 
    batch_size=batch_size,shuffle=True 
    ) 
test_loader = torch.utils.data.DataLoader( 
    datasets.CIFAR10('../data',train=False,transform=transform_test), 
    batch_size=batch_size,shuffle=True )


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
    def forward(self, x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        return torch.cat([x, out], 1)

class BottleneckBlock(nn.Module):
    def __init__(self, in_planes, out_planes, dropRate=0.0):
        super(BottleneckBlock, self).__init__()
        inter_planes = out_planes * 4
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1,
                               padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(inter_planes)
        self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
    def forward(self, x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
        out = self.conv2(self.relu(self.bn2(out)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
        return torch.cat([x, out], 1)

class TransitionBlock(nn.Module):
    def __init__(self, in_planes, out_planes, dropRate=0.0):
        super(TransitionBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
                               padding=0, bias=False)
        self.droprate = dropRate
    def forward(self, x):
        out = self.conv1(self.relu(self.bn1(x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
        return F.avg_pool2d(out, 2)

class DenseBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0):
        super(DenseBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate)
    def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

class DenseNet(nn.Module):
    def __init__(self, depth, num_classes, growth_rate=12,
                 reduction=0.5, bottleneck=True, dropRate=0.0):
        super(DenseNet, self).__init__()
        in_planes = 2 * growth_rate
        n = (depth - 4) / 3
        if bottleneck == True:
            n = n/2
            block = BottleneckBlock
        else:
            block = BasicBlock
        n = int(n)
        # 1st conv before any dense block
        self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
        in_planes = int(in_planes+n*growth_rate)
        self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
        in_planes = int(math.floor(in_planes*reduction))
        # 2nd block
        self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
        in_planes = int(in_planes+n*growth_rate)
        self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
        in_planes = int(math.floor(in_planes*reduction))
        # 3rd block
        self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
        in_planes = int(in_planes+n*growth_rate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(in_planes, num_classes)
        self.in_planes = in_planes

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.block1(out))
        out = self.trans2(self.block2(out))
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.in_planes)
        return self.fc(out)


model = DenseNet(layers,10,growth_rate=12,dropRate = 0.0)
model = model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate,
                            momentum=0.9,nesterov=True,weight_decay=1e-4)

Files already downloaded and verified


In [22]:
def train(train_loader,model,criterion,optimizer,epoch):
    model.train()
    trn_loss = 0.0
    start = time.time()
    for i, (input,target) in enumerate(train_loader):
        target = target.to(device)
        input = input.to(device)
        
        output = model(input)
        loss = criterion(output,target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        trn_loss += loss.data
        if(i%100 == 0):
            end=time.time()
            print("loss in epoch %d , step %d : %f time : %.3f 's" % (epoch, i,trn_loss/100, (end-start))) #loss.data[0]loss.item()
            start=time.time()
            trn_loss = 0.0

def test(test_loader,model,criterion,epoch):
    model.eval()
    
    correct = 0
    
    
    for i, (input,target) in enumerate(test_loader):
        target = target.to(device)
        input = input.to(device)
        
        output = model(input)
        loss = criterion(output,target)
        
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().float().sum()
    
    print("Accuracy in epoch %d : %f " % (epoch,100.0*correct/len(test_loader.dataset)))


def adjust_lr(optimizer, epoch, learning_rate):
    if epoch==150 :
        learning_rate*=0.1
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
fstart = time.time()
for epoch in range(0,20):
    adjust_lr(optimizer,epoch,learning_rate)
    train(train_loader,model,criterion,optimizer,epoch)
    test(test_loader,model,criterion,epoch)
fend = time.time()
print("%.3f 's"% (fend - fstart))

loss in epoch 0 , step 0 : 0.023152 time : 0.155 's
loss in epoch 0 , step 100 : 1.959336 time : 14.569 's
loss in epoch 0 , step 200 : 1.664730 time : 14.582 's
loss in epoch 0 , step 300 : 1.486180 time : 14.581 's
loss in epoch 0 , step 400 : 1.365689 time : 14.586 's
loss in epoch 0 , step 500 : 1.229280 time : 14.585 's
loss in epoch 0 , step 600 : 1.145364 time : 14.577 's
loss in epoch 0 , step 700 : 1.094166 time : 14.579 's
Accuracy in epoch 0 : 59.330002 
loss in epoch 1 , step 0 : 0.011738 time : 0.148 's
loss in epoch 1 , step 100 : 0.985535 time : 14.557 's
loss in epoch 1 , step 200 : 0.930653 time : 14.583 's
loss in epoch 1 , step 300 : 0.923074 time : 14.583 's
loss in epoch 1 , step 400 : 0.862954 time : 14.570 's
loss in epoch 1 , step 500 : 0.843441 time : 14.589 's
loss in epoch 1 , step 600 : 0.811860 time : 14.582 's
loss in epoch 1 , step 700 : 0.791787 time : 14.587 's
Accuracy in epoch 1 : 67.599998 
loss in epoch 2 , step 0 : 0.008283 time : 0.147 's
loss in 