In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

In [2]:
lr           = 0.1

start_epoch  = 1
num_epochs   = 200
batch_size   = 128

is_use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if is_use_cuda else "cpu")
best_acc    = 0.

In [3]:
# Data Preprocess
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test  = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = torchvision.datasets.CIFAR10(root='../../env_pytorch/IGCV_v1/data', transform=transform_train, train=True, download=True)
test_dataset  = torchvision.datasets.CIFAR10(root='../../env_pytorch/IGCV_v1/data', transform=transform_test, train=False, download=True)
train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=8, shuffle=True)
test_loader   = torch.utils.data.DataLoader(test_dataset, batch_size=80, num_workers=8, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(BasicBlock, self).__init__()
        reduction = 0.5
        if 2 == stride:
            reduction = 1
        elif in_channels > out_channels:
            reduction = 0.25
            
        self.conv1 = nn.Conv2d(in_channels, int(in_channels * reduction), 1, stride, bias=True)
        self.bn1   = nn.BatchNorm2d(int(in_channels * reduction))
        self.conv2 = nn.Conv2d(int(in_channels * reduction), int(in_channels * reduction * 0.5), 1, 1, bias=True)
        self.bn2   = nn.BatchNorm2d(int(in_channels * reduction * 0.5))
        self.conv3 = nn.Conv2d(int(in_channels * reduction * 0.5), int(in_channels * reduction), (1, 3), 1, (0, 1), bias=True)
        self.bn3   = nn.BatchNorm2d(int(in_channels * reduction))
        self.conv4 = nn.Conv2d(int(in_channels * reduction), int(in_channels * reduction), (3, 1), 1, (1, 0), bias=True)
        self.bn4   = nn.BatchNorm2d(int(in_channels * reduction))
        self.conv5 = nn.Conv2d(int(in_channels * reduction), out_channels, 1, 1, bias=True)
        self.bn5   = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if 2 == stride or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                            nn.Conv2d(in_channels, out_channels, 1, stride, bias=True),
                            nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))
        output = F.relu(self.bn2(self.conv2(output)))
        output = F.relu(self.bn3(self.conv3(output)))
        output = F.relu(self.bn4(self.conv4(output)))
        output = F.relu(self.bn5(self.conv5(output)))
        output += F.relu(self.shortcut(input))
        output = F.relu(output)
        return output
    
class SqueezeNext(nn.Module):
    def __init__(self, width_x, blocks, num_classes):
        super(SqueezeNext, self).__init__()
        self.in_channels = 64
        
        self.conv1  = nn.Conv2d(3, int(width_x * self.in_channels), 3, 1, 1, bias=True)     # For Cifar10
        #self.conv1  = nn.Conv2d(3, int(width_x * self.in_channels), 3, 2, 1, bias=True)     # For Tiny-ImageNet
        self.bn1    = nn.BatchNorm2d(int(width_x * self.in_channels))
        self.stage1 = self._make_layer(blocks[0], width_x, 32, 1)
        self.stage2 = self._make_layer(blocks[1], width_x, 64, 2)
        self.stage3 = self._make_layer(blocks[2], width_x, 128, 2)
        self.stage4 = self._make_layer(blocks[3], width_x, 256, 2)
        self.conv2  = nn.Conv2d(int(width_x * self.in_channels), int(width_x * 128), 1, 1, bias=True)
        self.bn2    = nn.BatchNorm2d(int(width_x * 128))
        self.linear = nn.Linear(int(width_x * 128), num_classes)
        
    def _make_layer(self, num_block, width_x, out_channels, stride):
        strides = [stride] + [1] * (num_block - 1)
        layers  = []
        for _stride in strides:
            layers.append(BasicBlock(int(width_x * self.in_channels), int(width_x * out_channels), _stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)
    
    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))
        output = self.stage1(output)
        output = self.stage2(output)
        output = self.stage3(output)
        output = self.stage4(output)
        output = F.relu(self.bn2(self.conv2(output)))
        output = F.avg_pool2d(output, 4)
        output = output.view(output.size(0), -1)
        output = self.linear(output)
        return output
    
def SqNxt_23_1x(num_classes):
    return SqueezeNext(1.0, [6, 6, 8, 1], num_classes)

def SqNxt_23_1x_v5(num_classes):
    return SqueezeNext(1.0, [2, 4, 14, 1], num_classes)

def SqNxt_23_2x(num_classes):
    return SqueezeNext(2.0, [6, 6, 8, 1], num_classes)

def SqNxt_23_2x_v5(num_classes):
    return SqueezeNext(2.0, [2, 4, 14, 1], num_classes)

net = SqNxt_23_1x(10)
tmp = torch.randn(1, 3, 32, 32)
y   = net(tmp)
print(y, type(y), y.size())

tensor([[ 0.1680,  0.1822, -0.0994, -0.0778, -0.2032, -0.0169, -0.2081,
         -0.0093,  0.0043, -0.1735]]) <class 'torch.Tensor'> torch.Size([1, 10])


In [5]:
import torch.nn.init as init
import numpy as np

def conv_init(m):
    class_name = m.__class__.__name__
    if class_name.find('Conv') != -1:
        init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        init.constant_(m.bias, 0)
    elif class_name.find('BatchNorm') != -1:
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)
        
net = SqNxt_23_1x(10)
net.apply(conv_init)
if is_use_cuda:
    net.to(device)
    net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
criterion = nn.CrossEntropyLoss()

In [8]:
import math
import sys
import time
import os

def lr_schedule(lr, epoch):
    optim_factor = 0
    if epoch > 160:
        optim_factor = 3
    elif epoch > 120:
        optim_factor = 2
    elif epoch > 60:
        optim_factor = 1
        
    return lr * math.pow(0.2, optim_factor)

def train(epoch):
    net.train()
    train_loss = 0
    correct    = 0
    total      = 0
    optimizer  = optim.SGD(net.parameters(), lr=lr_schedule(lr, epoch), momentum=0.9, weight_decay=5e-4)
    
    print('Training Epoch: #%d, LR: %.4f'%(epoch, lr_schedule(lr, epoch)))
    for idx, (inputs, labels) in enumerate(train_loader):
        if is_use_cuda:
            inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predict = torch.max(outputs, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels).cpu().sum().double()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Training Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(train_dataset) // batch_size, 
                          train_loss / (batch_size * (idx + 1)), correct / total))
        sys.stdout.flush()
        
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct   = 0
    total     = 0
    for idx, (inputs, labels) in enumerate(test_loader):
        if is_use_cuda:
            inputs, labels = inputs.to(device), labels.to(device)
        outputs        = net(inputs)
        loss           = criterion(outputs, labels)
        
        test_loss  += loss.item()
        _, predict = torch.max(outputs, 1)
        total      += labels.size(0)
        correct    += predict.eq(labels).cpu().sum().double()
        
        sys.stdout.write('\r')
        sys.stdout.write('[%s] Testing Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Acc@1: %.3f'
                        % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
                           epoch, num_epochs, idx, len(test_dataset) // 80, 
                          test_loss / (100 * (idx + 1)), correct / total))
        sys.stdout.flush()
        
    if correct / total > best_acc:
        print()
        print('Saving Model...')
        state = {
            'net': net.module if is_use_cuda else net,
            'acc': correct / total,
            'epoch': epoch
        }
        if not os.path.isdir('./checkpoint/SqNxt_23_1x'):
            os.makedirs('./checkpoint/SqNxt_23_1x')
        torch.save(state, './checkpoint/SqNxt_23_1x/SqNxt_23_1x_Cifar10.ckpt')
        best_acc = correct / total

In [9]:
import time

for _epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()
    train(_epoch)
    print()
    test(_epoch)
    print()
    print()
    end_time   = time.time()
    print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time))
    
print('Best Acc@1: %.4f' % (best_acc * 100))

Training Epoch: #1, LR: 0.1000
[2018-06-20 21:04:28] Training Epoch [1/200] Iter[390/390]		Loss: 0.0121 Acc@1: 0.433
[2018-06-20 21:04:46] Testing Epoch [1/200] Iter[124/125]		Loss: 0.0150 Acc@1: 0.465
Saving Model...


Epoch #1 Cost 257s
Training Epoch: #2, LR: 0.1000


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


[2018-06-20 21:09:48] Training Epoch [2/200] Iter[390/390]		Loss: 0.0108 Acc@1: 0.497
[2018-06-20 21:10:13] Testing Epoch [2/200] Iter[124/125]		Loss: 0.0132 Acc@1: 0.517
Saving Model...


Epoch #2 Cost 326s
Training Epoch: #3, LR: 0.1000
[2018-06-20 21:16:44] Training Epoch [3/200] Iter[390/390]		Loss: 0.0097 Acc@1: 0.553
[2018-06-20 21:17:10] Testing Epoch [3/200] Iter[124/125]		Loss: 0.0127 Acc@1: 0.559
Saving Model...


Epoch #3 Cost 416s
Training Epoch: #4, LR: 0.1000
[2018-06-20 21:23:33] Training Epoch [4/200] Iter[390/390]		Loss: 0.0090 Acc@1: 0.590
[2018-06-20 21:23:59] Testing Epoch [4/200] Iter[124/125]		Loss: 0.0131 Acc@1: 0.547

Epoch #4 Cost 408s
Training Epoch: #5, LR: 0.1000
[2018-06-20 21:30:10] Training Epoch [5/200] Iter[390/390]		Loss: 0.0083 Acc@1: 0.622
[2018-06-20 21:30:35] Testing Epoch [5/200] Iter[124/125]		Loss: 0.0117 Acc@1: 0.580
Saving Model...


Epoch #5 Cost 396s
Training Epoch: #6, LR: 0.1000
[2018-06-20 21:36:45] Training Epoch [6/200] Iter[390/390]		L

[2018-06-21 04:59:11] Training Epoch [73/200] Iter[390/390]		Loss: 0.0025 Acc@1: 0.887
[2018-06-21 04:59:36] Testing Epoch [73/200] Iter[124/125]		Loss: 0.0040 Acc@1: 0.864

Epoch #73 Cost 396s
Training Epoch: #74, LR: 0.0200
[2018-06-21 05:05:43] Training Epoch [74/200] Iter[390/390]		Loss: 0.0026 Acc@1: 0.885
[2018-06-21 05:06:08] Testing Epoch [74/200] Iter[124/125]		Loss: 0.0048 Acc@1: 0.839

Epoch #74 Cost 392s
Training Epoch: #75, LR: 0.0200
[2018-06-21 05:12:17] Training Epoch [75/200] Iter[390/390]		Loss: 0.0025 Acc@1: 0.887
[2018-06-21 05:12:43] Testing Epoch [75/200] Iter[124/125]		Loss: 0.0045 Acc@1: 0.848

Epoch #75 Cost 394s
Training Epoch: #76, LR: 0.0200
[2018-06-21 05:18:52] Training Epoch [76/200] Iter[390/390]		Loss: 0.0026 Acc@1: 0.886
[2018-06-21 05:19:17] Testing Epoch [76/200] Iter[124/125]		Loss: 0.0045 Acc@1: 0.848

Epoch #76 Cost 394s
Training Epoch: #77, LR: 0.0200
[2018-06-21 05:25:26] Training Epoch [77/200] Iter[390/390]		Loss: 0.0026 Acc@1: 0.885
[2018-06-

[2018-06-21 12:51:33] Training Epoch [144/200] Iter[390/390]		Loss: 0.0008 Acc@1: 0.966
[2018-06-21 12:51:59] Testing Epoch [144/200] Iter[124/125]		Loss: 0.0031 Acc@1: 0.907

Epoch #144 Cost 398s
Training Epoch: #145, LR: 0.0040
[2018-06-21 12:58:09] Training Epoch [145/200] Iter[390/390]		Loss: 0.0008 Acc@1: 0.967
[2018-06-21 12:58:33] Testing Epoch [145/200] Iter[124/125]		Loss: 0.0031 Acc@1: 0.906

Epoch #145 Cost 394s
Training Epoch: #146, LR: 0.0040
[2018-06-21 13:04:42] Training Epoch [146/200] Iter[390/390]		Loss: 0.0007 Acc@1: 0.968
[2018-06-21 13:05:07] Testing Epoch [146/200] Iter[124/125]		Loss: 0.0032 Acc@1: 0.905

Epoch #146 Cost 393s
Training Epoch: #147, LR: 0.0040
[2018-06-21 13:11:14] Training Epoch [147/200] Iter[390/390]		Loss: 0.0008 Acc@1: 0.968
[2018-06-21 13:11:39] Testing Epoch [147/200] Iter[124/125]		Loss: 0.0032 Acc@1: 0.905

Epoch #147 Cost 392s
Training Epoch: #148, LR: 0.0040
[2018-06-21 13:17:47] Training Epoch [148/200] Iter[390/390]		Loss: 0.0008 Acc@1