# ShuffleNet

## 0. Paper

### Info
* Title: ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
* Author: Xiangyu Zhang
* Task: Image Classification
* Link: https://arxiv.org/abs/1707.01083


### Features
* Dataset: CIFAR-10


### Reference
* https://github.com/kuangliu/pytorch-cifar


## 1. Setting

In [1]:
# Libraries
import os
import sys
import time
from glob import glob

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchsummary import torchsummary

In [2]:
CONFIG = {
    'lr': 0.1,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'batch_size': 128,
    'epoch_size': 200,
    'base_dir': '/content/drive/Shared drives/Yoon/Project/Doing/Deep Learning Paper Implementation',
}

## 2. Data

In [3]:
def create_dataset():
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2439, 0.2616)),
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2439, 0.2616)),
    ])

    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [45000, 5000])
    return train_dataset, val_dataset, test_dataset


def create_dataloader(batch_size):
    train_dataset, val_dataset, test_dataset = create_dataset()
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader, test_loader

In [4]:
train_loader, val_loader, test_loader = create_dataloader(CONFIG['batch_size'])
inputs, targets = next(iter(train_loader))
inputs.size(), targets.size()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


(torch.Size([128, 3, 32, 32]), torch.Size([128]))

## 3. Model

In [5]:

class Shuffle(nn.Module):
    def __init__(self, g):
        super(Shuffle, self).__init__()
        self.g = g
    
    def forward(self, x):
        B, C, H, W = x.size()
        x = x.view(B, self.g, C//self.g, H, W).permute(0, 2, 1, 3, 4).contiguous().view(B, C, H, W)
        return x


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_C, out_C, groups, stride):
        super(Bottleneck, self).__init__()
        C = out_C // self.expansion
        self.stride = stride
        if stride > 1:
            self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
        
        self.conv1 = nn.Conv2d(in_C, C, kernel_size=1, groups=groups, bias=False)
        self.bn1 = nn.BatchNorm2d(C)
        self.relu1 = nn.ReLU()
        self.shuffle = Shuffle(groups)
        
        self.conv2 = nn.Conv2d(C, C, kernel_size=3, stride=stride, padding=1, groups=C, bias=False)
        self.bn2 = nn.BatchNorm2d(C)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(C, out_C, kernel_size=1, groups=groups, bias=False)
        self.bn3 = nn.BatchNorm2d(out_C)
        self.relu3 = nn.ReLU()

    
    def forward(self, x):
        out = self.relu1(self.bn1(self.conv1(x)))
        out = self.shuffle(out)
        out = self.relu2(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))

        if self.stride > 1:
            shortcut = self.pool(x)
            out = torch.cat([out, shortcut], dim=1)
        else:
            out += x
        out = self.relu3(out)
        return out


class ShuffleNet(nn.Module):
    def __init__(self, cfg, num_classes=10):
        super(ShuffleNet, self).__init__()
        self.groups = cfg['groups']
        self.in_C = cfg['in_C']
        self.head = nn.Sequential(
            nn.Conv2d(3, self.in_C, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(self.in_C),
            nn.ReLU()
        )
    
        self.layer1 = self._make_layer(cfg['out_C'][0], cfg['num_blocks'][0])
        self.layer2 = self._make_layer(cfg['out_C'][1], cfg['num_blocks'][1])
        self.layer3 = self._make_layer(cfg['out_C'][2], cfg['num_blocks'][2])
        
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(self.in_C, num_classes)
        )

    def _make_layer(self, out_C, num_blocks):
        layers = []
        for i in range(num_blocks):
            st = 1 if i else 2
            modified_out_C = out_C if i else out_C - self.in_C
            layers.append(Bottleneck(self.in_C, modified_out_C, self.groups, st))
            self.in_C = out_C
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.head(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.classifier(x)
        return x

def ShuffleNet50():
    cfg = {
        'groups': 2,
        'in_C': 64,
        'out_C': [200, 400, 800],
        'num_blocks': [4, 8, 4]
    }
    return ShuffleNet(cfg)

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)
model = ShuffleNet50().to(device)

optimizer = optimizer = torch.optim.SGD(model.parameters(), lr=CONFIG['lr'], 
    momentum=CONFIG['momentum'], weight_decay=CONFIG['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epoch_size'])

torchsummary.summary(model, input_size=inputs.size()[1:], device=device)

Device:  cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 34, 32, 32]           1,088
       BatchNorm2d-5           [-1, 34, 32, 32]              68
              ReLU-6           [-1, 34, 32, 32]               0
           Shuffle-7           [-1, 34, 32, 32]               0
            Conv2d-8           [-1, 34, 16, 16]             306
       BatchNorm2d-9           [-1, 34, 16, 16]              68
             ReLU-10           [-1, 34, 16, 16]               0
           Conv2d-11          [-1, 136, 16, 16]           2,312
      BatchNorm2d-12          [-1, 136, 16, 16]             272
        AvgPool2d-13           [-1, 64, 16, 16]               0
             ReLU-14     

## 4. Experiment

In [7]:
class AverageMeter(object):
    def __init__(self, name):
        self.name = name
        self.reset()

    def reset(self):
        self.sum = 0
        self.count = 0
        self.avg = 0

    def update(self, val, n=1):
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = f'{self.name:10s} {self.avg:.3f}'
        return fmtstr


class ProgressMeter(object):
    def __init__(self, meters, loader_length, prefix=""):
        self.meters = [AverageMeter(i) for i in meters]
        self.loader_length = loader_length
        self.prefix = prefix
    
    def reset(self):
        for m in self.meters:
            m.reset()
    
    def update(self, values, n=1):
        for m, v in zip(self.meters, values):
            m.update(v, n)
            self.__setattr__(m.name, m.avg)

    def display(self, batch_idx, postfix=""):
        batch_info = f'[{batch_idx+1:03d}/{self.loader_length:03d}]'
        msg = [self.prefix + ' ' + batch_info]
        msg += [str(meter) for meter in self.meters]
        msg = ' | '.join(msg)

        sys.stdout.write('\r')
        sys.stdout.write(msg + postfix)
        sys.stdout.flush()


def accuracy(logits, targets):
    _, pred = logits.max(1)
    acc = pred.eq(targets).float().mean().item()
    return acc

def criterion(logits, targets):
    return F.cross_entropy(logits, targets)

In [8]:
class Trainer(object):
    def __init__(self, model, optimizer, device, scheduler=None):
        self.model = model
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device
        self.best_epoch, self.best_acc = 0, 0
        

    def train(self, train_loader, epoch):
        progress = ProgressMeter(["train_loss", "train_acc"], len(train_loader), prefix=f'EP  {epoch:03d}')
        self.model.train()

        start_time = time.time()
        for idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            outputs = self.model(inputs)
            loss = criterion(outputs, targets)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            acc = accuracy(outputs, targets)
            loss = loss.item()
            progress.update([loss, acc], n=inputs.size(0))
            if idx % 20 == 0:
                progress.display(idx+1)

        if self.scheduler: self.scheduler.step()
        finish_time = time.time()
        epoch_time = finish_time - start_time
        progress.display(idx, f' | {epoch_time:.0f}s' + '\n')

    
    def validate(self, val_loader, epoch):
        progress = ProgressMeter(["val_loss", "val_acc"], len(val_loader), prefix=f'VAL {epoch:03d}')
        model.eval()

        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                acc = accuracy(outputs, targets)
                progress.update([loss, acc], n=inputs.size(0))

            if progress.val_acc > self.best_acc:
                ckpt = {
                    'best_epoch': self.best_epoch,
                    'best_acc': self.best_acc,
                    'model_state_dict': model.state_dict()
                }
                torch.save(ckpt, 'ckpt.pt')
                self.best_epoch = epoch
                self.best_acc = progress.val_acc
            
            progress.display(idx, '\n')

    
    def test(self, test_loader):
        progress = ProgressMeter(["test_loss", "test_acc"], len(test_loader), prefix=f'TEST')
        ckpt = torch.load('ckpt.pt')
        self.model.load_state_dict(ckpt['model_state_dict'])
        self.model.eval()

        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(test_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                acc = accuracy(outputs, targets)
                progress.update([loss, acc], n=inputs.size(0))

            progress.display(idx, '\n')

In [9]:
trainer = Trainer(model, optimizer, device, scheduler)

In [10]:
for ep in range(CONFIG['epoch_size']):
    print('-' * 65)
    trainer.train(train_loader, ep)
    trainer.validate(val_loader, ep)

-----------------------------------------------------------------
EP  000 [352/352] | train_loss 3.589 | train_acc  0.164 | 28s
VAL 000 [040/040] | val_loss   2.006 | val_acc    0.182
-----------------------------------------------------------------
EP  001 [352/352] | train_loss 1.895 | train_acc  0.240 | 28s
VAL 001 [040/040] | val_loss   2.182 | val_acc    0.252
-----------------------------------------------------------------
EP  002 [352/352] | train_loss 1.673 | train_acc  0.350 | 28s
VAL 002 [040/040] | val_loss   1.765 | val_acc    0.349
-----------------------------------------------------------------
EP  003 [352/352] | train_loss 1.468 | train_acc  0.455 | 29s
VAL 003 [040/040] | val_loss   1.436 | val_acc    0.464
-----------------------------------------------------------------
EP  004 [352/352] | train_loss 1.306 | train_acc  0.526 | 28s
VAL 004 [040/040] | val_loss   1.550 | val_acc    0.470
-----------------------------------------------------------------
EP  005 [352/3

In [11]:
trainer.test(test_loader)

TEST [079/079] | test_loss  0.340 | test_acc   0.913
