### Import libarary

In [1]:
from __future__ import print_function
import os
import argparse

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

from models import vgg

### Set hyperparameter

In [None]:
SPARSITY_REGULARIZATION = True
#### set λ(balance factor) ####
LAMBDA = 1e-4

SEED = 1
TRAIN_BATCH_SIZE = 100
TEST_BATCH_SIZE = 1000
EPOCHS = 120
LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
LOG_INTERVAL = 100
CUDA = True

RESUME = False
START_EPOCH = 0

WEIGHT_PATH = 'model_best.pth'


In [3]:
if(torch.cuda.is_available()):
    CUDA = True
    kwargs = {'num_workers': 1, 'pin_memory': True}
    torch.cuda.manual_seed(SEED)
else:
    CUDA = False
    kwargs = {}


### Download dataset


Training set: Do augmentation(Pad, RandCrop, Random), while testing set don't

In [4]:
#### 資料集 ####
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.Pad(4),
                       transforms.RandomCrop(32),
                       transforms.RandomHorizontalFlip(),
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=TRAIN_BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=TEST_BATCH_SIZE, shuffle=True, **kwargs)

Files already downloaded and verified


### Define model and load trained weight

In [5]:
model = vgg()
if CUDA:
    model.cuda()

### Set Optimizer
At here we used Stocastic Gradient Descent (SGD)

In [6]:
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

### Update parameters using the sparse method in the paper

In [7]:
def updateBN():
  for m in model.modules():
      if isinstance(m, nn.BatchNorm2d):
          # update L1 norm
          m.weight.grad.add_(LAMBDA * torch.sign(m.weight)) 
          
          # update weight
          m.weight.data.add_(-LAMBDA * torch.sign(m.weight))

### Load predefined models and parameters

In [None]:
if(RESUME):
  checkpoint = torch.load(WEIGHT_PATH)
  model.load_state_dict(checkpoint['state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  START_EPOCH = checkpoint['epoch']
  best_prec1 = checkpoint['best_prec1']
  print(f'RESUME MODEL @EPOCH={START_EPOCH}, BEST_PREC1={best_prec1}')

### Define training and testing functions

In [None]:
#### Train Function #####
def train(epoch):
    model.train() 
    for batch_idx, (data, target) in enumerate(train_loader):
        if CUDA:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        if SPARSITY_REGULARIZATION:
            updateBN()
        optimizer.step()
        if batch_idx % LOG_INTERVAL == 0:
            print('Train Epoch: {} [{}/{} ({:.1f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))
#### Test function ####
def test():
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
      for data, target in test_loader:
          if CUDA:
              data, target = data.cuda(), target.cuda()
          data, target = Variable(data), Variable(target)
          output = model(data)
          test_loss += F.cross_entropy(output, target, size_average=False).data.item()
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()

      test_loss /= len(test_loader.dataset)
      print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
          test_loss, correct, len(test_loader.dataset),
          100. * correct / len(test_loader.dataset)))
      return correct / float(len(test_loader.dataset))

best_prec1 = 0.
for epoch in range(START_EPOCH, EPOCHS):
    # The learning rate at 0.5EPOCHS and 0.75EPOCHS is adjusted to 1/10 of the original
    if epoch in [EPOCHS*0.5, EPOCHS*0.75]:
        for param_group in optimizer.param_groups:
            param_group['lr'] *= 0.1
    train(epoch)
    prec1 = test()

    # Store model weights for subsequent pruning and training
    if(prec1 > best_prec1):
        torch.save({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, WEIGHT_PATH)
        
    best_prec1 = max(prec1, best_prec1)







Test set: Average loss: 1.2778, Accuracy: 5366/10000 (53.7%)


Test set: Average loss: 0.9895, Accuracy: 6586/10000 (65.9%)


Test set: Average loss: 0.8607, Accuracy: 7226/10000 (72.3%)


Test set: Average loss: 0.9778, Accuracy: 6912/10000 (69.1%)


Test set: Average loss: 0.7613, Accuracy: 7439/10000 (74.4%)


Test set: Average loss: 0.6815, Accuracy: 7712/10000 (77.1%)


Test set: Average loss: 0.6133, Accuracy: 7995/10000 (79.9%)


Test set: Average loss: 0.7936, Accuracy: 7557/10000 (75.6%)


Test set: Average loss: 0.7950, Accuracy: 7603/10000 (76.0%)


Test set: Average loss: 0.6969, Accuracy: 7752/10000 (77.5%)


Test set: Average loss: 0.6014, Accuracy: 8075/10000 (80.8%)


Test set: Average loss: 0.6152, Accuracy: 7984/10000 (79.8%)


Test set: Average loss: 0.6798, Accuracy: 7932/10000 (79.3%)


Test set: Average loss: 0.6189, Accuracy: 8041/10000 (80.4%)


Test set: Average loss: 0.6053, Accuracy: 7988/10000 (79.9%)


Test set: Average loss: 0.6922, Accuracy: 7943/10000 (