In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

In [2]:
lr           = 0.1

start_epoch  = 1
num_epochs   = 200
batch_size   = 256

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
is_use_cuda = torch.cuda.is_available()
torch.backends.cudnn.benchmark = True
best_acc    = 0.

In [3]:
# Data Preprocess
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test  = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', transform=transform_train, train=True, download=True)
test_dataset  = torchvision.datasets.CIFAR10(root='./data', transform=transform_test, train=False, download=True)
train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=8, shuffle=True)
test_loader   = torch.utils.data.DataLoader(test_dataset, batch_size=80, num_workers=8, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
import torch.nn.init as init

class ShuffleBlock(nn.Module):
    def __init__(self, groups):
        super(ShuffleBlock, self).__init__()
        self.groups = groups
        
    def forward(self, input):
        n, c, h, w = input.size()
        G          = self.groups
        output     = input.view(n, G, c // G, h, w).permute(0, 2, 1, 3, 4).contiguous().view(n, c, h, w)
        return output

class IGCVBlock(nn.Module):
    def __init__(self, in_channel, out_channel, L, M, stride):
        super(IGCVBlock, self).__init__()
        if out_channel != in_channel:
            M_t = out_channel // L
        else:
            M_t = M
        
        self.conv1    = nn.Conv2d(in_channel, out_channel, 3, stride, 1, groups=L, bias=True)
        self.bn1      = nn.BatchNorm2d(out_channel)
        self.shuffle1 = ShuffleBlock(L)
        self.conv2    = nn.Conv2d(out_channel, out_channel, 1, groups=M_t, bias=True)
        self.bn2      = nn.BatchNorm2d(out_channel)
        self.shuffle2 = ShuffleBlock(M_t)
        
        self.shortcut = nn.Sequential()
        if 1 != stride or in_channel != out_channel:
            self.shortcut = nn.Sequential(
                            nn.Conv2d(in_channel, out_channel, 1, stride, bias=True),
                            nn.BatchNorm2d(out_channel)
            )
        
    def forward(self, input):
        feat = F.relu(self.bn1(self.conv1(input)))
        feat = self.shuffle1(feat)
        feat = self.bn2(self.conv2(feat))
        feat = self.shuffle2(feat)
        feat += self.shortcut(input)
        feat = F.relu(feat)
        return feat

class IGCV_V1(nn.Module):
    def __init__(self, L, M, D, is_L_twice, num_classes):
        super(IGCV_V1, self).__init__()
        
        assert (D - 2) % 3 == 0, 'D must be equival to 3B + 2'
        B = (D - 2) // 3
        
        self.in_channel = L * M
        self.conv1  = nn.Conv2d(3, self.in_channel, 3, 1, 1, bias=True)
        self.bn1    = nn.BatchNorm2d(self.in_channel)
        self.stage1 = self._make_layers(self.in_channel, L, M, B)
        self.in_channel *= 2
        if is_L_twice:
            L *= 2
        else:
            M *= 2
        self.stage2 = self._make_layers(self.in_channel, L, M, B)
        self.in_channel *= 2
        if is_L_twice:
            L *= 2
        else:
            M *= 2
        self.stage3 = self._make_layers(self.in_channel, L, M, B)
        self.linear = nn.Linear(self.in_channel * 2, num_classes)
        
    def _make_layers(self, in_channel, L, M, B):
        strides = [2] + [1] * (B - 1)
        layers = []
        out_channel = self.in_channel
        for i, _stride in enumerate(strides):
            if i == len(strides) - 1:
                out_channel *= 2
            layers.append(IGCVBlock(self.in_channel, out_channel, L, M, _stride))
        return nn.Sequential(*layers)
    
    def forward(self, input):
        feat = F.relu(self.bn1(self.conv1(input)))
        feat = self.stage1(feat)
        feat = self.stage2(feat)
        feat = self.stage3(feat)
        feat = F.avg_pool2d(feat, 4)
        feat = feat.view(feat.size(0), -1)
        out  = self.linear(feat)
        return out
    
def IGCV_L24M2(num_classes):
    return IGCV_V1(24, 2, 20, False, num_classes)

net = IGCV_L24M2(10)
x   = Variable(torch.randn(1, 3, 32, 32))
y   = net(x)
print(y, type(y), y.size())

Variable containing:
 0.3589 -0.1375  0.0552 -0.0505  0.0053  0.3871  0.1123  0.3889  0.0840 -0.1020
[torch.FloatTensor of size 1x10]
 <class 'torch.autograd.variable.Variable'> torch.Size([1, 10])


In [5]:
import numpy as np
def conv_init(m):
    class_name = m.__class__.__name__
    if class_name.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
        init.constant(m.bias, 0)
    elif class_name.find('BatchNorm') != -1:
        init.constant(m.weight, 1)
        init.constant(m.bias, 0)
        
net = IGCV_L24M2(10)
net.apply(conv_init)
if is_use_cuda:
    net.cuda()
    net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
criterion = nn.CrossEntropyLoss()

In [6]:
import math
import os
import sys

def lr_schedule(lr, epoch):
    optim_factor = 0
    if epoch > 160:
        optim_factor = 3
    elif epoch > 120:
        optim_factor = 2
    elif epoch > 60:
        optim_factor = 1
        
    return lr * math.pow(0.2, optim_factor)

def train(epoch):
    net.train()
    train_loss = 0
    correct    = 0
    total      = 0
    optimizer  = optim.SGD(net.parameters(), lr=lr_schedule(lr, epoch), momentum=0.9, weight_decay=5e-4)
    
    print('Training Epoch: #%d, LR: %.4f'%(epoch, lr_schedule(lr, epoch)))
    for idx, (inputs, labels) in enumerate(train_loader):
        if is_use_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        inputs, labels = Variable(inputs), Variable(labels)
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.data[0]
        _, predict = torch.max(outputs.data, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels.data).cpu().sum()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Training Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(train_dataset) // batch_size, 
                          train_loss / (batch_size * (idx + 1)), correct / total))
        sys.stdout.flush()
        
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct   = 0
    total     = 0
    for idx, (inputs, labels) in enumerate(test_loader):
        if is_use_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()
        inputs, labels = Variable(inputs, volatile=True), Variable(labels)
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        
        test_loss  += loss.data[0]
        _, predict = torch.max(outputs.data, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels.data).cpu().sum()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Testing Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(test_dataset) // 80, 
                          test_loss / (100 * (idx + 1)), correct / total))
        sys.stdout.flush()
        
    if correct / total > best_acc:
        print()
        print('Saving Model...')
        state = {
            'net': net.module if is_use_cuda else net,
            'acc': correct / total,
            'epoch': epoch
        }
        if not os.path.isdir('./checkpoint/IGCV_L24M2'):
            os.makedirs('./checkpoint/IGCV_L24M2')
        torch.save(state, './checkpoint/IGCV_L24M2/IGCV_L24M2_Cifar10.ckpt')
        best_acc = correct / total

In [7]:
import time

for _epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()
    train(_epoch)
    print()
    test(_epoch)
    print()
    end_time   = time.time()
    print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time))
    
print('Best Acc@1: %.4f' % (best_acc * 100))

Training Epoch: #1, LR: 0.1000
[2018-06-12 17:45:50] Training Epoch [1/200] Iter[195/195]		Loss: 0.0068 Acc@1: 0.354
[2018-06-12 17:45:59] Testing Epoch [1/200] Iter[124/125]		Loss: 0.0146 Acc@1: 0.474
Saving Model...

Epoch #1 Cost 56s
Training Epoch: #2, LR: 0.1000


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


[2018-06-12 17:46:42] Training Epoch [2/200] Iter[195/195]		Loss: 0.0051 Acc@1: 0.521
[2018-06-12 17:46:51] Testing Epoch [2/200] Iter[124/125]		Loss: 0.0158 Acc@1: 0.451
Epoch #2 Cost 52s
Training Epoch: #3, LR: 0.1000
[2018-06-12 17:47:34] Training Epoch [3/200] Iter[195/195]		Loss: 0.0043 Acc@1: 0.608
[2018-06-12 17:47:43] Testing Epoch [3/200] Iter[124/125]		Loss: 0.0105 Acc@1: 0.621
Saving Model...

Epoch #3 Cost 51s
Training Epoch: #4, LR: 0.1000
[2018-06-12 17:48:26] Training Epoch [4/200] Iter[195/195]		Loss: 0.0037 Acc@1: 0.662
[2018-06-12 17:48:34] Testing Epoch [4/200] Iter[124/125]		Loss: 0.0119 Acc@1: 0.601
Epoch #4 Cost 51s
Training Epoch: #5, LR: 0.1000
[2018-06-12 17:49:17] Training Epoch [5/200] Iter[195/195]		Loss: 0.0033 Acc@1: 0.705
[2018-06-12 17:49:26] Testing Epoch [5/200] Iter[124/125]		Loss: 0.0093 Acc@1: 0.682
Saving Model...

Epoch #5 Cost 51s
Training Epoch: #6, LR: 0.1000
[2018-06-12 17:50:09] Training Epoch [6/200] Iter[195/195]		Loss: 0.0029 Acc@1: 0.737


[2018-06-12 18:17:43] Training Epoch [38/200] Iter[195/195]		Loss: 0.0017 Acc@1: 0.847
[2018-06-12 18:17:51] Testing Epoch [38/200] Iter[124/125]		Loss: 0.0069 Acc@1: 0.777
Epoch #38 Cost 50s
Training Epoch: #39, LR: 0.1000
[2018-06-12 18:18:33] Training Epoch [39/200] Iter[195/195]		Loss: 0.0017 Acc@1: 0.849
[2018-06-12 18:18:42] Testing Epoch [39/200] Iter[124/125]		Loss: 0.0065 Acc@1: 0.791
Epoch #39 Cost 50s
Training Epoch: #40, LR: 0.1000
[2018-06-12 18:19:24] Training Epoch [40/200] Iter[195/195]		Loss: 0.0017 Acc@1: 0.850
[2018-06-12 18:19:33] Testing Epoch [40/200] Iter[124/125]		Loss: 0.0062 Acc@1: 0.789
Epoch #40 Cost 50s
Training Epoch: #41, LR: 0.1000
[2018-06-12 18:20:15] Training Epoch [41/200] Iter[195/195]		Loss: 0.0017 Acc@1: 0.853
[2018-06-12 18:20:24] Testing Epoch [41/200] Iter[124/125]		Loss: 0.0068 Acc@1: 0.777
Epoch #41 Cost 51s
Training Epoch: #42, LR: 0.1000
[2018-06-12 18:21:06] Training Epoch [42/200] Iter[195/195]		Loss: 0.0017 Acc@1: 0.850
[2018-06-12 18:21

[2018-06-12 18:48:11] Training Epoch [74/200] Iter[195/195]		Loss: 0.0008 Acc@1: 0.931
[2018-06-12 18:48:20] Testing Epoch [74/200] Iter[124/125]		Loss: 0.0038 Acc@1: 0.879
Epoch #74 Cost 51s
Training Epoch: #75, LR: 0.0200
[2018-06-12 18:49:01] Training Epoch [75/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.935
[2018-06-12 18:49:09] Testing Epoch [75/200] Iter[124/125]		Loss: 0.0034 Acc@1: 0.889
Epoch #75 Cost 49s
Training Epoch: #76, LR: 0.0200
[2018-06-12 18:49:51] Training Epoch [76/200] Iter[195/195]		Loss: 0.0008 Acc@1: 0.933
[2018-06-12 18:50:00] Testing Epoch [76/200] Iter[124/125]		Loss: 0.0036 Acc@1: 0.885
Epoch #76 Cost 50s
Training Epoch: #77, LR: 0.0200
[2018-06-12 18:50:41] Training Epoch [77/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.933
[2018-06-12 18:50:50] Testing Epoch [77/200] Iter[124/125]		Loss: 0.0037 Acc@1: 0.881
Epoch #77 Cost 50s
Training Epoch: #78, LR: 0.0200
[2018-06-12 18:51:31] Training Epoch [78/200] Iter[195/195]		Loss: 0.0008 Acc@1: 0.933
[2018-06-12 18:51

[2018-06-12 19:18:47] Testing Epoch [110/200] Iter[124/125]		Loss: 0.0037 Acc@1: 0.882
Epoch #110 Cost 50s
Training Epoch: #111, LR: 0.0200
[2018-06-12 19:19:28] Training Epoch [111/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.934
[2018-06-12 19:19:37] Testing Epoch [111/200] Iter[124/125]		Loss: 0.0035 Acc@1: 0.887
Epoch #111 Cost 50s
Training Epoch: #112, LR: 0.0200
[2018-06-12 19:20:19] Training Epoch [112/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.932
[2018-06-12 19:20:28] Testing Epoch [112/200] Iter[124/125]		Loss: 0.0043 Acc@1: 0.871
Epoch #112 Cost 50s
Training Epoch: #113, LR: 0.0200
[2018-06-12 19:21:10] Training Epoch [113/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.933
[2018-06-12 19:21:19] Testing Epoch [113/200] Iter[124/125]		Loss: 0.0043 Acc@1: 0.868
Epoch #113 Cost 51s
Training Epoch: #114, LR: 0.0200
[2018-06-12 19:22:02] Training Epoch [114/200] Iter[195/195]		Loss: 0.0007 Acc@1: 0.935
[2018-06-12 19:22:10] Testing Epoch [114/200] Iter[124/125]		Loss: 0.0049 Acc@1: 0.846
E

[2018-06-12 19:49:06] Training Epoch [146/200] Iter[195/195]		Loss: 0.0002 Acc@1: 0.983
[2018-06-12 19:49:15] Testing Epoch [146/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.909
Epoch #146 Cost 50s
Training Epoch: #147, LR: 0.0040
[2018-06-12 19:49:57] Training Epoch [147/200] Iter[195/195]		Loss: 0.0002 Acc@1: 0.983
[2018-06-12 19:50:05] Testing Epoch [147/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.907
Epoch #147 Cost 50s
Training Epoch: #148, LR: 0.0040
[2018-06-12 19:50:48] Training Epoch [148/200] Iter[195/195]		Loss: 0.0002 Acc@1: 0.984
[2018-06-12 19:50:57] Testing Epoch [148/200] Iter[124/125]		Loss: 0.0034 Acc@1: 0.910
Epoch #148 Cost 51s
Training Epoch: #149, LR: 0.0040
[2018-06-12 19:51:38] Training Epoch [149/200] Iter[195/195]		Loss: 0.0002 Acc@1: 0.984
[2018-06-12 19:51:47] Testing Epoch [149/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.911
Epoch #149 Cost 50s
Training Epoch: #150, LR: 0.0040
[2018-06-12 19:52:29] Training Epoch [150/200] Iter[195/195]		Loss: 0.0002 Acc@1: 0.983


[2018-06-12 20:18:55] Testing Epoch [181/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.916
Saving Model...

Epoch #181 Cost 50s
Training Epoch: #182, LR: 0.0008
[2018-06-12 20:19:38] Training Epoch [182/200] Iter[195/195]		Loss: 0.0001 Acc@1: 0.994
[2018-06-12 20:19:46] Testing Epoch [182/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.914
Epoch #182 Cost 51s
Training Epoch: #183, LR: 0.0008
[2018-06-12 20:20:29] Training Epoch [183/200] Iter[195/195]		Loss: 0.0001 Acc@1: 0.994
[2018-06-12 20:20:38] Testing Epoch [183/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.915
Epoch #183 Cost 51s
Training Epoch: #184, LR: 0.0008
[2018-06-12 20:21:20] Training Epoch [184/200] Iter[195/195]		Loss: 0.0001 Acc@1: 0.994
[2018-06-12 20:21:29] Testing Epoch [184/200] Iter[124/125]		Loss: 0.0033 Acc@1: 0.915
Epoch #184 Cost 51s
Training Epoch: #185, LR: 0.0008
[2018-06-12 20:22:10] Training Epoch [185/200] Iter[195/195]		Loss: 0.0001 Acc@1: 0.994
[2018-06-12 20:22:19] Testing Epoch [185/200] Iter[124/125]		Loss: 0.00