In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [12]:
lr           = 0.1

start_epoch  = 1
num_epochs   = 200
batch_size   = 96

is_use_cuda = torch.cuda.is_available()
torch.backends.cudnn.benchmark = True
best_acc    = 0.

In [13]:
import os
# Data Preprocess
transform_train = transforms.Compose([
    transforms.RandomCrop(64, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.48024579, 0.44807218, 0.39754775), (0.27698641, 0.26906449, 0.28208191))
])

transform_test  = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.48236438, 0.44950216, 0.39812628), (0.27701401, 0.26931673, 0.2829424))
])

train_dataset = datasets.ImageFolder(os.path.join('E:\\WorkPlace\\pytorch_tiny_imagenet\\data', 'train'), transform_train)
test_dataset  = datasets.ImageFolder(os.path.join('E:\\WorkPlace\\pytorch_tiny_imagenet\\data', 'val'), transform_test)
train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=8, shuffle=True)
test_loader   = torch.utils.data.DataLoader(test_dataset, batch_size=80, num_workers=8, shuffle=False)
class_name    = train_dataset.classes
#print(class_name)

In [15]:
import torch.nn.init as init

class ShuffleBlock(nn.Module):
    def __init__(self, groups):
        super(ShuffleBlock, self).__init__()
        self.groups = groups
        
    def forward(self, input):
        n, c, h, w = input.size()
        G          = self.groups
        output     = input.view(n, G, c // G, h, w).permute(0, 2, 1, 3, 4).contiguous().view(n, c, h, w)
        return output

class IGCVBlock(nn.Module):
    def __init__(self, in_channel, out_channel, L, M, stride):
        super(IGCVBlock, self).__init__()
        if out_channel != in_channel:
            M_t = out_channel // L
        else:
            M_t = M
        
        self.conv1    = nn.Conv2d(in_channel, out_channel, 3, stride, 1, groups=L, bias=True)
        self.bn1      = nn.BatchNorm2d(out_channel)
        self.shuffle1 = ShuffleBlock(L)
        self.conv2    = nn.Conv2d(out_channel, out_channel, 1, groups=M_t, bias=True)
        self.bn2      = nn.BatchNorm2d(out_channel)
        self.shuffle2 = ShuffleBlock(M_t)
        
        self.shortcut = nn.Sequential()
        if 1 != stride or in_channel != out_channel:
            self.shortcut = nn.Sequential(
                            nn.Conv2d(in_channel, out_channel, 1, stride, bias=True),
                            nn.BatchNorm2d(out_channel)
            )
        
    def forward(self, input):
        feat = F.relu(self.bn1(self.conv1(input)))
        feat = self.shuffle1(feat)
        feat = self.bn2(self.conv2(feat))
        feat = self.shuffle2(feat)
        feat += self.shortcut(input)
        feat = F.relu(feat)
        return feat

class IGCV_V1(nn.Module):
    def __init__(self, L, M, D, is_L_twice, num_classes):
        super(IGCV_V1, self).__init__()
        
        assert (D - 2) % 3 == 0, 'D must be equival to 3B + 2'
        B = (D - 2) // 3
        
        self.in_channel = L * M
        self.conv1  = nn.Conv2d(3, self.in_channel, 3, 1, 1, bias=True)
        self.bn1    = nn.BatchNorm2d(self.in_channel)
        self.stage1 = self._make_layers(self.in_channel, L, M, B)
        self.in_channel *= 2
        if is_L_twice:
            L *= 2
        else:
            M *= 2
        self.stage2 = self._make_layers(self.in_channel, L, M, B)
        self.in_channel *= 2
        if is_L_twice:
            L *= 2
        else:
            M *= 2
        self.stage3 = self._make_layers(self.in_channel, L, M, B)
        self.linear = nn.Linear(self.in_channel * 2, num_classes)
        
    def _make_layers(self, in_channel, L, M, B):
        strides = [2] + [1] * (B - 1)
        layers = []
        out_channel = self.in_channel
        for i, _stride in enumerate(strides):
            if i == len(strides) - 1:
                out_channel *= 2
            layers.append(IGCVBlock(self.in_channel, out_channel, L, M, _stride))
        return nn.Sequential(*layers)
    
    def forward(self, input):
        feat = F.relu(self.bn1(self.conv1(input)))
        feat = self.stage1(feat)
        feat = self.stage2(feat)
        feat = self.stage3(feat)
        feat = F.avg_pool2d(feat, 8)
        feat = feat.view(feat.size(0), -1)
        out  = self.linear(feat)
        return out
    
def IGCV_L24M2(num_classes):
    return IGCV_V1(24, 2, 20, False, num_classes)

net = IGCV_L24M2(200)
x   = Variable(torch.randn(1, 3, 64, 64))
y   = net(x)
print(y, type(y), y.size())

Variable containing:

Columns 0 to 9 
 0.1813 -0.0679  0.0274 -0.3348  0.1392 -0.1380  0.1955 -0.2704  0.1824 -0.1123

Columns 10 to 19 
 0.0124  0.0228 -0.2162 -0.0095 -0.1695 -0.3106  0.0397 -0.0540 -0.0636 -0.0091

Columns 20 to 29 
-0.2761  0.0735 -0.0585 -0.0479  0.0324  0.2498  0.0870  0.0452  0.2367 -0.0895

Columns 30 to 39 
-0.0943 -0.0118  0.2668  0.1365  0.0023 -0.0902 -0.1897 -0.4248  0.2003 -0.1374

Columns 40 to 49 
-0.2398  0.6363 -0.1924 -0.0227 -0.3340  0.0776  0.2507 -0.1656 -0.0578 -0.0871

Columns 50 to 59 
-0.0788 -0.2369 -0.1225 -0.1602 -0.3462 -0.1089  0.2045 -0.2057  0.1805  0.0797

Columns 60 to 69 
-0.3822 -0.2182  0.1988  0.0181 -0.0246 -0.3010  0.4239  0.1257  0.0499  0.1942

Columns 70 to 79 
 0.0482 -0.0360  0.3120 -0.2579  0.1448  0.0282 -0.0545 -0.1435 -0.2621 -0.2400

Columns 80 to 89 
 0.1645 -0.1296 -0.0147  0.0152 -0.0167  0.1685 -0.2259  0.0058 -0.1662 -0.0057

Columns 90 to 99 
 0.0900 -0.0200  0.1237  0.2340 -0.1799  0.1032 -0.2695  0.0700  0.0798

In [16]:
import numpy as np
def conv_init(m):
    class_name = m.__class__.__name__
    if class_name.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
        init.constant(m.bias, 0)
    elif class_name.find('BatchNorm') != -1:
        init.constant(m.weight, 1)
        init.constant(m.bias, 0)
        
net = IGCV_L24M2(200)
net.apply(conv_init)
if is_use_cuda:
    net.cuda()
    net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
criterion = nn.CrossEntropyLoss()

In [17]:
import math
import os
import sys

def lr_schedule(lr, epoch):
    optim_factor = 0
    if epoch > 160:
        optim_factor = 3
    elif epoch > 120:
        optim_factor = 2
    elif epoch > 60:
        optim_factor = 1
        
    return lr * math.pow(0.2, optim_factor)

def train(epoch):
    net.train()
    train_loss = 0
    correct    = 0
    total      = 0
    optimizer  = optim.SGD(net.parameters(), lr=lr_schedule(lr, epoch), momentum=0.9, weight_decay=5e-4)
    
    print('Training Epoch: #%d, LR: %.4f'%(epoch, lr_schedule(lr, epoch)))
    for idx, (inputs, labels) in enumerate(train_loader):
        if is_use_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        inputs, labels = Variable(inputs), Variable(labels)
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.data[0]
        _, predict = torch.max(outputs.data, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels.data).cpu().sum()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Training Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(train_dataset) // batch_size, 
                          train_loss / (batch_size * (idx + 1)), correct / total))
        sys.stdout.flush()
        
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct   = 0
    total     = 0
    for idx, (inputs, labels) in enumerate(test_loader):
        if is_use_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()
        inputs, labels = Variable(inputs, volatile=True), Variable(labels)
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        
        test_loss  += loss.data[0]
        _, predict = torch.max(outputs.data, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels.data).cpu().sum()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Testing Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(test_dataset) // 80, 
                          test_loss / (100 * (idx + 1)), correct / total))
        sys.stdout.flush()
        
    if correct / total > best_acc:
        print()
        print('Saving Model...')
        state = {
            'net': net.module if is_use_cuda else net,
            'acc': correct / total,
            'epoch': epoch
        }
        if not os.path.isdir('./checkpoint/IGCV_L24M2'):
            os.makedirs('./checkpoint/IGCV_L24M2')
        torch.save(state, './checkpoint/IGCV_L24M2/IGCV_L24M2_Tiny_ImageNet.ckpt')
        best_acc = correct / total

In [18]:
import time

for _epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()
    train(_epoch)
    print()
    test(_epoch)
    print()
    end_time   = time.time()
    print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time))
    
print('Best Acc@1: %.4f' % (best_acc * 100))

Training Epoch: #1, LR: 0.1000
[2018-06-12 22:01:27] Training Epoch [1/200] Iter[1041/1041]		Loss: 0.0471 Acc@1: 0.069
[2018-06-12 22:01:42] Testing Epoch [1/200] Iter[124/125]		Loss: 0.0414 Acc@1: 0.112
Saving Model...

Epoch #1 Cost 254s
Training Epoch: #2, LR: 0.1000


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


[2018-06-12 22:05:32] Training Epoch [2/200] Iter[1041/1041]		Loss: 0.0380 Acc@1: 0.178
[2018-06-12 22:05:42] Testing Epoch [2/200] Iter[124/125]		Loss: 0.0350 Acc@1: 0.211
Saving Model...

Epoch #2 Cost 240s
Training Epoch: #3, LR: 0.1000
[2018-06-12 22:09:32] Training Epoch [3/200] Iter[1041/1041]		Loss: 0.0346 Acc@1: 0.234
[2018-06-12 22:09:42] Testing Epoch [3/200] Iter[124/125]		Loss: 0.0361 Acc@1: 0.195
Epoch #3 Cost 239s
Training Epoch: #4, LR: 0.1000
[2018-06-12 22:13:32] Training Epoch [4/200] Iter[1041/1041]		Loss: 0.0329 Acc@1: 0.264
[2018-06-12 22:13:42] Testing Epoch [4/200] Iter[124/125]		Loss: 0.0346 Acc@1: 0.237
Saving Model...

Epoch #4 Cost 240s
Training Epoch: #5, LR: 0.1000
[2018-06-12 22:17:33] Training Epoch [5/200] Iter[1041/1041]		Loss: 0.0318 Acc@1: 0.285
[2018-06-12 22:17:43] Testing Epoch [5/200] Iter[124/125]		Loss: 0.0354 Acc@1: 0.244
Saving Model...

Epoch #5 Cost 240s
Training Epoch: #6, LR: 0.1000
[2018-06-12 22:21:33] Training Epoch [6/200] Iter[1041/10

[2018-06-13 00:25:22] Training Epoch [37/200] Iter[1041/1041]		Loss: 0.0266 Acc@1: 0.386
[2018-06-13 00:25:32] Testing Epoch [37/200] Iter[124/125]		Loss: 0.0286 Acc@1: 0.329
Epoch #37 Cost 240s
Training Epoch: #38, LR: 0.1000
[2018-06-13 00:29:22] Training Epoch [38/200] Iter[1041/1041]		Loss: 0.0266 Acc@1: 0.387
[2018-06-13 00:29:32] Testing Epoch [38/200] Iter[124/125]		Loss: 0.0302 Acc@1: 0.300
Epoch #38 Cost 239s
Training Epoch: #39, LR: 0.1000
[2018-06-13 00:33:22] Training Epoch [39/200] Iter[1041/1041]		Loss: 0.0266 Acc@1: 0.388
[2018-06-13 00:33:32] Testing Epoch [39/200] Iter[124/125]		Loss: 0.0316 Acc@1: 0.284
Epoch #39 Cost 239s
Training Epoch: #40, LR: 0.1000
[2018-06-13 00:37:21] Training Epoch [40/200] Iter[1041/1041]		Loss: 0.0266 Acc@1: 0.388
[2018-06-13 00:37:32] Testing Epoch [40/200] Iter[124/125]		Loss: 0.0296 Acc@1: 0.326
Epoch #40 Cost 239s
Training Epoch: #41, LR: 0.1000
[2018-06-13 00:41:20] Training Epoch [41/200] Iter[1041/1041]		Loss: 0.0266 Acc@1: 0.388
[20

Training Epoch: #73, LR: 0.0200
[2018-06-13 02:49:23] Training Epoch [73/200] Iter[1041/1041]		Loss: 0.0197 Acc@1: 0.528
[2018-06-13 02:49:33] Testing Epoch [73/200] Iter[124/125]		Loss: 0.0222 Acc@1: 0.469
Epoch #73 Cost 240s
Training Epoch: #74, LR: 0.0200
[2018-06-13 02:53:24] Training Epoch [74/200] Iter[1041/1041]		Loss: 0.0195 Acc@1: 0.532
[2018-06-13 02:53:34] Testing Epoch [74/200] Iter[124/125]		Loss: 0.0215 Acc@1: 0.480
Epoch #74 Cost 240s
Training Epoch: #75, LR: 0.0200
[2018-06-13 02:57:24] Training Epoch [75/200] Iter[1041/1041]		Loss: 0.0195 Acc@1: 0.532
[2018-06-13 02:57:34] Testing Epoch [75/200] Iter[124/125]		Loss: 0.0217 Acc@1: 0.475
Epoch #75 Cost 240s
Training Epoch: #76, LR: 0.0200
[2018-06-13 03:01:24] Training Epoch [76/200] Iter[1041/1041]		Loss: 0.0195 Acc@1: 0.531
[2018-06-13 03:01:34] Testing Epoch [76/200] Iter[124/125]		Loss: 0.0225 Acc@1: 0.469
Epoch #76 Cost 239s
Training Epoch: #77, LR: 0.0200
[2018-06-13 03:05:25] Training Epoch [77/200] Iter[1041/1041

[2018-06-13 05:13:33] Training Epoch [109/200] Iter[1041/1041]		Loss: 0.0189 Acc@1: 0.543
[2018-06-13 05:13:43] Testing Epoch [109/200] Iter[124/125]		Loss: 0.0213 Acc@1: 0.483
Epoch #109 Cost 241s
Training Epoch: #110, LR: 0.0200
[2018-06-13 05:17:34] Training Epoch [110/200] Iter[1041/1041]		Loss: 0.0188 Acc@1: 0.545
[2018-06-13 05:17:44] Testing Epoch [110/200] Iter[124/125]		Loss: 0.0216 Acc@1: 0.481
Epoch #110 Cost 240s
Training Epoch: #111, LR: 0.0200
[2018-06-13 05:21:35] Training Epoch [111/200] Iter[1041/1041]		Loss: 0.0189 Acc@1: 0.544
[2018-06-13 05:21:45] Testing Epoch [111/200] Iter[124/125]		Loss: 0.0209 Acc@1: 0.493
Epoch #111 Cost 240s
Training Epoch: #112, LR: 0.0200
[2018-06-13 05:25:34] Training Epoch [112/200] Iter[1041/1041]		Loss: 0.0188 Acc@1: 0.545
[2018-06-13 05:25:44] Testing Epoch [112/200] Iter[124/125]		Loss: 0.0213 Acc@1: 0.479
Epoch #112 Cost 239s
Training Epoch: #113, LR: 0.0200
[2018-06-13 05:29:35] Training Epoch [113/200] Iter[1041/1041]		Loss: 0.0188

[2018-06-13 07:33:27] Training Epoch [144/200] Iter[1041/1041]		Loss: 0.0140 Acc@1: 0.652
[2018-06-13 07:33:37] Testing Epoch [144/200] Iter[124/125]		Loss: 0.0185 Acc@1: 0.552
Epoch #144 Cost 240s
Training Epoch: #145, LR: 0.0040
[2018-06-13 07:37:27] Training Epoch [145/200] Iter[1041/1041]		Loss: 0.0140 Acc@1: 0.653
[2018-06-13 07:37:38] Testing Epoch [145/200] Iter[124/125]		Loss: 0.0183 Acc@1: 0.551
Epoch #145 Cost 240s
Training Epoch: #146, LR: 0.0040
[2018-06-13 07:41:28] Training Epoch [146/200] Iter[1041/1041]		Loss: 0.0140 Acc@1: 0.651
[2018-06-13 07:41:38] Testing Epoch [146/200] Iter[124/125]		Loss: 0.0183 Acc@1: 0.554
Epoch #146 Cost 240s
Training Epoch: #147, LR: 0.0040
[2018-06-13 07:45:28] Training Epoch [147/200] Iter[1041/1041]		Loss: 0.0139 Acc@1: 0.653
[2018-06-13 07:45:38] Testing Epoch [147/200] Iter[124/125]		Loss: 0.0184 Acc@1: 0.555
Epoch #147 Cost 240s
Training Epoch: #148, LR: 0.0040
[2018-06-13 07:49:28] Training Epoch [148/200] Iter[1041/1041]		Loss: 0.0140

[2018-06-13 09:53:45] Training Epoch [179/200] Iter[1041/1041]		Loss: 0.0110 Acc@1: 0.726
[2018-06-13 09:53:55] Testing Epoch [179/200] Iter[124/125]		Loss: 0.0175 Acc@1: 0.576
Epoch #179 Cost 243s
Training Epoch: #180, LR: 0.0008
[2018-06-13 09:57:48] Training Epoch [180/200] Iter[1041/1041]		Loss: 0.0110 Acc@1: 0.726
[2018-06-13 09:57:59] Testing Epoch [180/200] Iter[124/125]		Loss: 0.0175 Acc@1: 0.577
Epoch #180 Cost 243s
Training Epoch: #181, LR: 0.0008
[2018-06-13 10:01:49] Training Epoch [181/200] Iter[1041/1041]		Loss: 0.0110 Acc@1: 0.725
[2018-06-13 10:01:59] Testing Epoch [181/200] Iter[124/125]		Loss: 0.0176 Acc@1: 0.574
Epoch #181 Cost 240s
Training Epoch: #182, LR: 0.0008
[2018-06-13 10:05:49] Training Epoch [182/200] Iter[1041/1041]		Loss: 0.0110 Acc@1: 0.725
[2018-06-13 10:05:59] Testing Epoch [182/200] Iter[124/125]		Loss: 0.0176 Acc@1: 0.575
Epoch #182 Cost 239s
Training Epoch: #183, LR: 0.0008
[2018-06-13 10:09:49] Training Epoch [183/200] Iter[1041/1041]		Loss: 0.0109