In [1]:
# https://github.com/pytorch/vision/blob/master/torchvision/models/__init__.py
import argparse
import os,sys
import shutil
import pdb, time
from collections import OrderedDict

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models
from utils import convert_secs2time, time_string, time_file_str
# from models import print_log
import models
import random
import numpy as np
import copy

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

In [2]:
# python ./utils/get_small_model.py 
# /home/hongky/datasets/imagenet 
# -a resnet101  --workers 12 
# --resume  ./0810_resnet101/resnet101-rate-0.7/best.resnet101.2020-10-08-2702.pth.tar 
# --save_dir ./0810_resnet101/resnet101-rate-0.7/infer_small_model/ 
# --batch-size 64 
# --rate 0.7 --get_small

from dotmap import DotMap

args = DotMap()
args.data = '/home/hongky/datasets/imagenet'
args.save_dir = './0810_resnet101/resnet101-rate-0.7/infer_small_model/'
args.arch = 'resnet101'
args.workers = 12
args.batch_size = 64
args.lr = 0.1
args.print_freq = 200
args.resume = './0810_resnet101/resnet101-rate-0.7/best.resnet101.2020-10-08-2702.pth.tar'
args.rate = 0.7
args.layer_begin = 3
args.layer_end = 3
args.layer_inter = 1
args.epoch_prune = 1
args.skip_downsample = 1
args.get_small = True 
args.use_cuda = True

args.prefix = time_file_str()

In [21]:
def validate(val_loader, model, criterion, log, is_cuda=False):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        # target = target.cuda(async=True)
        if is_cuda:
            input = input.cuda()
        target = target.cuda(non_blocking=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                i, len(val_loader), batch_time=batch_time, loss=losses,
                top1=top1, top5=top5), log)

    print_log(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5,
                                                                                           error1=100 - top1.avg), log)

    return top1.avg


def save_checkpoint(state, is_best, filename, bestname):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestname)


def print_log(print_string, log):
    print("{}".format(print_string))
    log.write('{}\n'.format(print_string))
    log.flush()


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


def remove_module_dict(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    return new_state_dict

In [4]:
best_prec1 = 0

if not os.path.isdir(args.save_dir):
    os.makedirs(args.save_dir)
log = open(os.path.join(args.save_dir, 'gpu-time.{}.{}.log'.format(args.arch, args.prefix)), 'w')

# create model
print_log("=> creating model '{}'".format(args.arch), log)
model = models.__dict__[args.arch](pretrained=False)
print_log("=> Model : {}".format(model), log)
print_log("=> parameter : {}".format(args), log)
print_log("Compress Rate: {}".format(args.rate), log)
print_log("Layer Begin: {}".format(args.layer_begin), log)
print_log("Layer End: {}".format(args.layer_end), log)
print_log("Layer Inter: {}".format(args.layer_inter), log)
print_log("Epoch prune: {}".format(args.epoch_prune), log)
print_log("Skip downsample : {}".format(args.skip_downsample), log)

# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print_log("=> loading checkpoint '{}'".format(args.resume), log)
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        state_dict = checkpoint['state_dict']
        state_dict = remove_module_dict(state_dict)
        model.load_state_dict(state_dict)
        print_log("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']), log)
    else:
        print_log("=> no checkpoint found at '{}'".format(args.resume), log)

cudnn.benchmark = True

# Data loading code
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        # transforms.Scale(256),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=args.batch_size, shuffle=False,
    num_workers=args.workers, pin_memory=True)

criterion = nn.CrossEntropyLoss().cuda()

=> creating model 'resnet101'
=> Model : ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d

=> loaded checkpoint './0810_resnet101/resnet101-rate-0.7/best.resnet101.2020-10-08-2702.pth.tar' (epoch 94)


In [5]:
def check_channel(tensor):
    #print('DEBUG: tensor-size')
    #print(tensor.size())
    #print(len(tensor.size()))
#     if len(tensor.size()) == 0:
#         return 0, 0
    size_0 = tensor.size()[0]
    size_1 = tensor.size()[1] * tensor.size()[2] * tensor.size()[3]
    tensor_resize = tensor.view(size_0, -1)
    # indicator: if the channel contain all zeros
    channel_if_zero = np.zeros(size_0)
    for x in range(0, size_0, 1):
        channel_if_zero[x] = np.count_nonzero(tensor_resize[x].cpu().numpy()) != 0
    # indices = (torch.LongTensor(channel_if_zero) != 0 ).nonzero().view(-1)

    indices_nonzero = torch.LongTensor((channel_if_zero != 0).nonzero()[0])
    # indices_nonzero = torch.LongTensor((channel_if_zero != 0).nonzero()[0])

    zeros = (channel_if_zero == 0).nonzero()[0]
    indices_zero = torch.LongTensor(zeros) if zeros != [] else []

    return indices_zero, indices_nonzero

In [6]:
big_model = model

item = list(big_model.state_dict().items())

print("length of state dict is", len(item))


#print('big_model: ', big_model)
#print('big_model.state_dict():', big_model.state_dict())

# indices_list = []
kept_index_per_layer = {}
kept_filter_per_layer = {}
pruned_index_per_layer = {}

#print('item[0]: ', item[0])
#     param_names = [(it[0], it[1].size()) for it in item]
#     for v in param_names:
#         print(v)

#     print('\n\n')

for x in range(0, len(item), 1):
    # print(item[x][0])
    if(len(item[x][1].size()) == 4):
        print('\n\n')
        print(item[x][0], type(item[x][1]))
        print('++')
        indices_zero, indices_nonzero = check_channel(item[x][1])
        print('shape: ', item[x][1].size())
        print('indices_zero {} ++ indices_nonzero {}'.format(len(indices_zero), len(indices_nonzero)))
        # indices_list.append(indices_nonzero)
        pruned_index_per_layer[item[x][0]] = indices_zero
        kept_index_per_layer[item[x][0]] = indices_nonzero
        kept_filter_per_layer[item[x][0]] = indices_nonzero.shape[0]
        
    else:
        print(item[x][0], type(item[x][1]), ' :: ', item[x][1].size())
        
        
        

length of state dict is 626



conv1.weight <class 'torch.Tensor'>
++
shape:  torch.Size([64, 3, 7, 7])
indices_zero 19 ++ indices_nonzero 45
bn1.weight <class 'torch.Tensor'>  ::  torch.Size([64])
bn1.bias <class 'torch.Tensor'>  ::  torch.Size([64])
bn1.running_mean <class 'torch.Tensor'>  ::  torch.Size([64])
bn1.running_var <class 'torch.Tensor'>  ::  torch.Size([64])
bn1.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer1.0.conv1.weight <class 'torch.Tensor'>
++
shape:  torch.Size([64, 64, 1, 1])
indices_zero 19 ++ indices_nonzero 45
layer1.0.bn1.weight <class 'torch.Tensor'>  ::  torch.Size([64])
layer1.0.bn1.bias <class 'torch.Tensor'>  ::  torch.Size([64])
layer1.0.bn1.running_mean <class 'torch.Tensor'>  ::  torch.Size([64])
layer1.0.bn1.running_var <class 'torch.Tensor'>  ::  torch.Size([64])
layer1.0.bn1.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer1.0.conv2.weight <class 'torch.Tensor'>
++
shape:  torch.Size([64, 64, 3, 3])
ind



shape:  torch.Size([256, 256, 3, 3])
indices_zero 76 ++ indices_nonzero 180
layer3.6.bn2.weight <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.6.bn2.bias <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.6.bn2.running_mean <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.6.bn2.running_var <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.6.bn2.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer3.6.conv3.weight <class 'torch.Tensor'>
++
shape:  torch.Size([1024, 256, 1, 1])
indices_zero 307 ++ indices_nonzero 717
layer3.6.bn3.weight <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.6.bn3.bias <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.6.bn3.running_mean <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.6.bn3.running_var <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.6.bn3.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer3.7.conv1.weight <class 'torch.Tensor'>
++
shape:  torch.Size([256, 1024, 1, 1])
indi

shape:  torch.Size([1024, 256, 1, 1])
indices_zero 307 ++ indices_nonzero 717
layer3.17.bn3.weight <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.17.bn3.bias <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.17.bn3.running_mean <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.17.bn3.running_var <class 'torch.Tensor'>  ::  torch.Size([1024])
layer3.17.bn3.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer3.18.conv1.weight <class 'torch.Tensor'>
++
shape:  torch.Size([256, 1024, 1, 1])
indices_zero 76 ++ indices_nonzero 180
layer3.18.bn1.weight <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.18.bn1.bias <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.18.bn1.running_mean <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.18.bn1.running_var <class 'torch.Tensor'>  ::  torch.Size([256])
layer3.18.bn1.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])



layer3.18.conv2.weight <class 'torch.Tensor'>
++
shape:  torch.Size([256, 256,

shape:  torch.Size([2048, 512, 1, 1])
indices_zero 614 ++ indices_nonzero 1434
layer4.2.bn3.weight <class 'torch.Tensor'>  ::  torch.Size([2048])
layer4.2.bn3.bias <class 'torch.Tensor'>  ::  torch.Size([2048])
layer4.2.bn3.running_mean <class 'torch.Tensor'>  ::  torch.Size([2048])
layer4.2.bn3.running_var <class 'torch.Tensor'>  ::  torch.Size([2048])
layer4.2.bn3.num_batches_tracked <class 'torch.Tensor'>  ::  torch.Size([])
fc.weight <class 'torch.Tensor'>  ::  torch.Size([1000, 2048])
fc.bias <class 'torch.Tensor'>  ::  torch.Size([1000])


In [7]:
def prune_conv_bn(conv1, bn1, inplanes, inplanes_indices=None, kernel_size=1, stride=1, padding=0, bias=False):
    indices_zero, indices_nonzero = check_channel(conv1.weight.detach())
    print(len(indices_zero), len(indices_nonzero))
    n_outplanes = len(indices_nonzero)

    n_conv1 = nn.Conv2d(inplanes, n_outplanes, kernel_size=kernel_size, stride=stride, padding=padding,
                                   bias=bias)

#     print('-------------\n conv1')
    state_dict = {}
    for k in conv1.state_dict().keys():

        vals = conv1.state_dict()[k]
#         print('param: ', k, type(vals), vals.size())
        state_dict[k] = torch.index_select(vals, 0, indices_nonzero)
        if inplanes_indices != None:
            state_dict[k] = torch.index_select(state_dict[k], 1, inplanes_indices)
    n_conv1.load_state_dict(state_dict)


#     print('-------------\n bn1')
    state_dict = {}
    
    n_bn1 = nn.BatchNorm2d(len(indices_nonzero))
    for k in bn1.state_dict().keys():
        vals = bn1.state_dict()[k]
#         print('param: ', k, type(vals), vals.size())
        if(len(vals.size()) > 0):
            state_dict[k] = torch.index_select(vals, 0, indices_nonzero)
        else:
            state_dict[k] = vals

    n_bn1.load_state_dict(state_dict)
    
    return n_conv1, n_bn1, n_outplanes, indices_nonzero


def prune_inplane_conv_bn(conv1, bn1, inplanes, inplanes_indices=None, kernel_size=1, stride=1, padding=0, bias=False):
    n_outplanes = conv1.weight.size()[0]

    n_conv1 = nn.Conv2d(inplanes, n_outplanes, kernel_size=kernel_size, stride=stride, padding=padding,
                                   bias=bias)

#     print('-------------\n conv1')
    state_dict = {}
    for k in conv1.state_dict().keys():

        vals = conv1.state_dict()[k]
        if inplanes_indices != None:
            state_dict[k] = torch.index_select(vals, 1, inplanes_indices)
        else:
            state_dict[k] = vals
    n_conv1.load_state_dict(state_dict)


    n_bn1 = bn1
    
    indices_nonzero = None
    
    return n_conv1, n_bn1, n_outplanes, indices_nonzero



class PrunedBottleneck(nn.Module):
    expansion = 4

    def __init__(self, origin_block, inplanes, inplanes_indices, stride=1, downsample=None):
        super(PrunedBottleneck, self).__init__()
        
        
        conv1, bn1, next_inplanes, next_inplanes_indices = prune_conv_bn(origin_block.conv1, origin_block.bn1, 
                                   inplanes, inplanes_indices, 
                                   kernel_size=1, bias=False)
        self.conv1 = conv1
        self.bn1 = bn1
        
        
        conv2, bn2, next_inplanes, next_inplanes_indices = prune_conv_bn(origin_block.conv2, origin_block.bn2,
                               next_inplanes, next_inplanes_indices,  
                               kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.conv2 = conv2
        self.bn2 = bn2
        
        
        conv3, bn3, next_inplanes, next_inplanes_indices = prune_inplane_conv_bn(origin_block.conv3, origin_block.bn3,
                               next_inplanes, next_inplanes_indices,  
                               kernel_size=1, bias=False)
        self.conv3 = conv3
        self.bn3 = bn3
        
        
        self.next_inplanes = next_inplanes
        self.next_inplanes_indices = next_inplanes_indices
        
        
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        
    def forward(self, x):
        residual = x
#         print('block-0:', x.size())

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
#         print('block-1:', out.size())

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
#         print('block-2:', out.size())

        out = self.conv3(out)
        out = self.bn3(out)
#         print('block-3:', out.size())

        if self.downsample is not None:
            residual = self.downsample(x)
#             print('residual:', residual.size())

        out += residual
        out = self.relu(out)

        return out
    
    
    
    




class CloneBottleneck(nn.Module):

    def __init__(self, origin_block, inplanes, inplanes_indices, stride=1, downsample=None):
        super(CloneBottleneck, self).__init__()
        
        
        conv1, bn1, next_inplanes, next_inplanes_indices = prune_inplane_conv_bn(origin_block.conv1, origin_block.bn1, 
                                   inplanes, inplanes_indices, 
                                   kernel_size=1, bias=False)
        self.conv1 = conv1
        self.bn1 = bn1
        
        
        conv2, bn2, next_inplanes, next_inplanes_indices = prune_inplane_conv_bn(origin_block.conv2, origin_block.bn2,
                               next_inplanes, next_inplanes_indices,  
                               kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.conv2 = conv2
        self.bn2 = bn2
        
        
        conv3, bn3, next_inplanes, next_inplanes_indices = prune_inplane_conv_bn(origin_block.conv3, origin_block.bn3,
                               next_inplanes, next_inplanes_indices,  
                               kernel_size=1, bias=False)
        self.conv3 = conv3
        self.bn3 = bn3
        
        
        self.next_inplanes = next_inplanes
        self.next_inplanes_indices = next_inplanes_indices
        
        
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        
    def forward(self, x):
        residual = x
        #print('block-0:', x.size())

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        #print('block-1:', out.size())

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        #print('block-2:', out.size())

        out = self.conv3(out)
        out = self.bn3(out)
        #print('block-3:', out.size())

        if self.downsample is not None:
            residual = self.downsample(x)
            #print('residual:', residual.size())

        out += residual
        out = self.relu(out)

        return out

In [8]:
def make_downsample(origin_downsample, conv3, inplanes, inplanes_indices, stride):
    indices_zero, indices_nonzero = check_channel(conv3.weight.detach())
    print(len(indices_zero), len(indices_nonzero))
    n_outplanes = len(indices_nonzero)
    
    
    conv1 = origin_downsample[0]
    bn1 = origin_downsample[1]
    
    n_conv1 = nn.Conv2d(inplanes, n_outplanes,
                        kernel_size=1, stride=stride, bias=False)
                
            
    #     print('-------------\n conv1')
    state_dict = {}
    for k in conv1.state_dict().keys():

        vals = conv1.state_dict()[k]
#         print('param: ', k, type(vals), vals.size())
        state_dict[k] = torch.index_select(vals, 0, indices_nonzero)
        if inplanes_indices != None:
            state_dict[k] = torch.index_select(state_dict[k], 1, inplanes_indices)
    n_conv1.load_state_dict(state_dict)


#     print('-------------\n bn1')
    state_dict = {}
    
    n_bn1 = nn.BatchNorm2d(n_outplanes)
    for k in bn1.state_dict().keys():
        vals = bn1.state_dict()[k]
#         print('param: ', k, type(vals), vals.size())
        if(len(vals.size()) > 0):
            state_dict[k] = torch.index_select(vals, 0, indices_nonzero)
        else:
            state_dict[k] = vals

    n_bn1.load_state_dict(state_dict)
    
    n_downsample = nn.Sequential(n_conv1, n_bn1)
    return n_downsample


def make_normal_downsample(origin_downsample, inplanes, inplanes_indices, stride):
    conv1 = origin_downsample[0]
    bn1 = origin_downsample[1]
    
    n_outplanes = conv1.weight.size()[0]
    
    n_conv1 = nn.Conv2d(inplanes, n_outplanes,
                        kernel_size=1, stride=stride, bias=False)
                
    state_dict = {}
    for k in conv1.state_dict().keys():
        vals = conv1.state_dict()[k]
        if inplanes_indices != None:
            state_dict[k] = torch.index_select(vals, 1, inplanes_indices)
        else:
            state_dict[k] = vals
    n_conv1.load_state_dict(state_dict)

    n_bn1 = bn1
    
    n_downsample = nn.Sequential(n_conv1, n_bn1)
    return n_downsample
            




class PruneResNet101(nn.Module):

    def __init__(self, origin_model, layers=[3,4,23,3], num_classes=1000):
        super(PruneResNet101, self).__init__()
        
        
        conv1, bn1, next_inplanes, next_inplanes_indices = prune_conv_bn(origin_model.conv1, origin_model.bn1, 
                                   inplanes=3, inplanes_indices=None, 
                                   kernel_size=7, stride=2, padding=3, bias=False)
        
        self.conv1 = conv1
        self.bn1 = bn1

#         self.conv1 = origin_model.conv1
#         self.bn1 = origin_model.bn1
#         next_inplanes = 64
#         next_inplanes_indices = None
        
        
        
        
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        
        
        
        self.layer1, next_inplanes, next_inplanes_indices = self._make_layer(
                                    origin_model.layer1,
                                    next_inplanes, next_inplanes_indices, 
                                    layers[0])
        print('origin_layer1::')
        print(origin_model.layer1)
        print('======')
        print('layer1::')
        print(self.layer1)
        print('---\n\n')
        
        self.layer2, next_inplanes, next_inplanes_indices = self._make_layer(
                                    origin_model.layer2,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[1], stride=2)
        print('origin_layer2::')
        print(origin_model.layer2)
        print('======')
        print('layer2::')
        print(self.layer2)
        print('---\n\n')
        
        self.layer3, next_inplanes, next_inplanes_indices = self._make_layer(
                                    origin_model.layer3,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[2], stride=2)
        print('origin_layer3::')
        print(origin_model.layer3)
        print('======')
        print('layer3::')
        print(self.layer3)
        print('---\n\n')
        
        self.layer4, next_inplanes, next_inplanes_indices = self._make_layer(
                                    origin_model.layer4,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[3], stride=2)
        print('origin_layer4::')
        print(origin_model.layer4)
        print('======')
        print('layer4::')
        print(self.layer4)
        print('---\n\n')
        
        self.avgpool = nn.AvgPool2d(7, stride=1)
        
        
        fc = nn.Linear(next_inplanes, num_classes)
        o_fc = origin_model.fc
        state_dict = {}
        for k in o_fc.state_dict().keys():
            vals = o_fc.state_dict()[k]
            if(len(vals.size()) > 1):
                state_dict[k] = torch.index_select(vals, 1, next_inplanes_indices)
            else:
                state_dict[k] = vals
                
        fc.load_state_dict(state_dict)
        
        
        self.fc = fc

        

    def _make_layer(self, origin_layer, inplanes, inplanes_indices, blocks, stride=1):
        print('blocks: ', blocks)
        layers = []
        
        block0 = origin_layer[0]
        downsample = make_downsample(block0.downsample, block0.conv3, inplanes, inplanes_indices, stride)
        
        
        # origin_block, inplanes, inplanes_indices, stride=1, downsample=None
        new_block0 = PrunedBottleneck(block0, inplanes, inplanes_indices, stride, downsample)
        inplanes = new_block0.next_inplanes
        inplanes_indices = new_block0.next_inplanes_indices
        
        layers.append(new_block0)
        
        
        for i in range(1, blocks):
            blocki = origin_layer[i]
            new_blocki = PrunedBottleneck(blocki, inplanes, inplanes_indices, downsample=None)
            inplanes = new_blocki.next_inplanes
            inplanes_indices = new_blocki.next_inplanes_indices
            layers.append(new_blocki)

        return nn.Sequential(*layers), inplanes, inplanes_indices
    
    

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        
#         print('0: ',x.size())

        x = self.layer1(x)
#         print('1: ',x.size())
        
        x = self.layer2(x)
#         print('2: ',x.size())
        
        x = self.layer3(x)
#         print('3: ',x.size())
        
        x = self.layer4(x)
#         print('4: ',x.size())

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x
    


In [15]:

class CloneResNet101(nn.Module):

    def __init__(self, origin_model, layers=[3,4,23,3], num_classes=1000):
        super(CloneResNet101, self).__init__()
        
        
#         conv1, bn1, next_inplanes, next_inplanes_indices = prune_conv_bn(origin_model.conv1, origin_model.bn1, 
#                                    inplanes=3, inplanes_indices=None, 
#                                    kernel_size=7, stride=2, padding=3, bias=False)
        
#         self.conv1 = conv1
#         self.bn1 = bn1

        self.conv1 = origin_model.conv1
        self.bn1 = origin_model.bn1
        next_inplanes = 64
        next_inplanes_indices = None
        
        
        
        
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        
        
        
        self.layer1, next_inplanes, next_inplanes_indices = self._make_normal_layer(
                                    origin_model.layer1,
                                    next_inplanes, next_inplanes_indices, 
                                    layers[0])
#         print('origin_layer1::')
#         print(origin_model.layer1)
        print('======')
        print('layer1::')
        print(self.layer1)
        print('---\n\n')
        
        self.layer2, next_inplanes, next_inplanes_indices = self._make_normal_layer(
                                    origin_model.layer2,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[1], stride=2)
#         print('origin_layer2::')
#         print(origin_model.layer2)
        print('======')
        print('layer2::')
        print(self.layer2)
        print('---\n\n')
        
        self.layer3, next_inplanes, next_inplanes_indices = self._make_mix_layer(
                                    origin_model.layer3,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[2], stride=2)
#         print('origin_layer3::')
#         print(origin_model.layer3)
        print('======')
        print('layer3::')
        print(self.layer3)
        print('---\n\n')
        
        self.layer4, next_inplanes, next_inplanes_indices = self._make_normal_layer(
                                    origin_model.layer4,
                                    next_inplanes, next_inplanes_indices,  
                                    layers[3], stride=2)
        print('origin_layer4::')
        print(origin_model.layer4)
        print('======')
        print('layer4::')
        print(self.layer4)
        print('---\n\n')
        
        self.avgpool = nn.AvgPool2d(7, stride=1)
        
        
#         fc = nn.Linear(next_inplanes, num_classes)
#         o_fc = origin_model.fc
#         state_dict = {}
#         for k in o_fc.state_dict().keys():
#             vals = o_fc.state_dict()[k]
#             if(len(vals.size()) > 1):
#                 state_dict[k] = torch.index_select(vals, 1, next_inplanes_indices)
#             else:
#                 state_dict[k] = vals
                
#         fc.load_state_dict(state_dict)
        
        
        self.fc = origin_model.fc

        

    def _make_layer(self, origin_layer, inplanes, inplanes_indices, blocks, stride=1):
        print('blocks: ', blocks)
        layers = []
        
        block0 = origin_layer[0]
        downsample = make_downsample(block0.downsample, block0.conv3, inplanes, inplanes_indices, stride)
        
        
        # origin_block, inplanes, inplanes_indices, stride=1, downsample=None
        new_block0 = PrunedBottleneck(block0, inplanes, inplanes_indices, stride, downsample)
        inplanes = new_block0.next_inplanes
        inplanes_indices = new_block0.next_inplanes_indices
        
        layers.append(new_block0)
        
        
        for i in range(1, blocks):
            blocki = origin_layer[i]
            new_blocki = PrunedBottleneck(blocki, inplanes, inplanes_indices, downsample=None)
            inplanes = new_blocki.next_inplanes
            inplanes_indices = new_blocki.next_inplanes_indices
            layers.append(new_blocki)

        return nn.Sequential(*layers), inplanes, inplanes_indices
    
    
    def _make_normal_layer(self, origin_layer, inplanes, inplanes_indices, blocks, stride=1):
        print('blocks: ', blocks)
        layers = []
        
        block0 = origin_layer[0]
        downsample = make_normal_downsample(block0.downsample, inplanes, inplanes_indices, stride)
        
        
        # origin_block, inplanes, inplanes_indices, stride=1, downsample=None
        new_block0 = CloneBottleneck(block0, inplanes, inplanes_indices, stride, downsample)
        inplanes = new_block0.next_inplanes
        inplanes_indices = new_block0.next_inplanes_indices
        
        layers.append(new_block0)
        
        
        for i in range(1, blocks):
            blocki = origin_layer[i]
            new_blocki = CloneBottleneck(blocki, inplanes, inplanes_indices, downsample=None)
            inplanes = new_blocki.next_inplanes
            inplanes_indices = new_blocki.next_inplanes_indices
            layers.append(new_blocki)

        return nn.Sequential(*layers), inplanes, inplanes_indices
    
    
    def _make_mix_layer(self, origin_layer, inplanes, inplanes_indices, blocks, stride=1):
        print('blocks: ', blocks)
        layers = []
        
        block0 = origin_layer[0]
        downsample = make_normal_downsample(block0.downsample, inplanes, inplanes_indices, stride)
        
        
        # origin_block, inplanes, inplanes_indices, stride=1, downsample=None
        new_block0 = PrunedBottleneck(block0, inplanes, inplanes_indices, stride, downsample)
        inplanes = new_block0.next_inplanes
        inplanes_indices = new_block0.next_inplanes_indices
        
        layers.append(new_block0)
        
        
        for i in range(1, blocks-1):
            blocki = origin_layer[i]
            new_blocki = PrunedBottleneck(blocki, inplanes, inplanes_indices, downsample=None)
            inplanes = new_blocki.next_inplanes
            inplanes_indices = new_blocki.next_inplanes_indices
            layers.append(new_blocki)
        

        return nn.Sequential(*layers), inplanes, inplanes_indices
    

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        
#         print('0: ',x.size())

        x = self.layer1(x)
#         print('1: ',x.size())
        
        x = self.layer2(x)
#         print('2: ',x.size())
        
        x = self.layer3(x)
#         print('3: ',x.size())
        
        x = self.layer4(x)
#         print('4: ',x.size())

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x
    


In [16]:

#cpu_model = model.cpu()
# pruned_network = CloneResNet101(model)
# pruned_network = pruned_network.cuda()
# example = torch.rand(1, 3, 224, 224).cuda()
# out = pruned_network(example)
#print(pruned_network)


In [17]:
pruned_network = CloneResNet101(model.cpu())
small_model = torch.nn.DataParallel(pruned_network).cuda()
print('evaluate: small')
print('small model accu', validate(val_loader, small_model, criterion, log))

blocks:  3
layer1::
Sequential(
  (0): CloneBottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): CloneBottleneck(
    (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)




76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
76 180
layer3::
Sequential(
  (0): PrunedBottleneck(
    (conv1): Conv2d(512, 180, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(180, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(180, 180, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(180, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(180, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )


origin_layer4::
Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): Bottleneck(
    (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

  from ipykernel import kernelapp as app
  app.launch_new_instance()


Test: [0/782]	Time 6.773 (6.773)	Loss 0.6370 (0.6370)	Prec@1 87.500 (87.500)	Prec@5 95.312 (95.312)
Test: [200/782]	Time 0.229 (0.222)	Loss 0.7767 (0.7203)	Prec@1 84.375 (81.281)	Prec@5 93.750 (95.421)
Test: [400/782]	Time 0.188 (0.207)	Loss 0.7671 (0.8439)	Prec@1 78.125 (78.581)	Prec@5 95.312 (94.373)
Test: [600/782]	Time 0.189 (0.201)	Loss 0.9634 (0.9555)	Prec@1 84.375 (76.201)	Prec@5 90.625 (92.993)
 * Prec@1 75.154 Prec@5 92.414 Error@1 24.846
small model accu 75.154


In [18]:
torch.save(small_model, 'pruned_layer3_resnet101_0.7.pth')

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [None]:
loaded_model = torch.load('pruned_layer3_resnet101_0.7.pth')
small_load_model = loaded_model.cuda() #torch.nn.DataParallel(loaded_model).cuda()
print('evaluate: small')
print('small model accu', validate(val_loader, small_load_model, criterion, log, is_cuda=True))

evaluate: small


  app.launch_new_instance()


Test: [0/782]	Time 6.753 (6.753)	Loss 0.6370 (0.6370)	Prec@1 87.500 (87.500)	Prec@5 95.312 (95.312)
Test: [200/782]	Time 0.233 (0.233)	Loss 0.7767 (0.7203)	Prec@1 84.375 (81.281)	Prec@5 93.750 (95.421)
Test: [400/782]	Time 0.172 (0.216)	Loss 0.7671 (0.8439)	Prec@1 78.125 (78.581)	Prec@5 95.312 (94.373)
Test: [600/782]	Time 0.173 (0.209)	Loss 0.9634 (0.9555)	Prec@1 84.375 (76.201)	Prec@5 90.625 (92.993)


In [14]:
args.lr = 0.1
args.lr_adjust=30
args.momentum = 0.9
args.weight_decay = 1e-4
args.print_freq = 200


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // args.lr_adjust))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        #input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5), log)

In [18]:
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    
    
train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=256, shuffle=True,
    num_workers=8, pin_memory=True, sampler=None)

In [19]:
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()






optimizer = torch.optim.SGD(model.parameters(), args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay,
                            nesterov=True)


# val_acc_1 = validate(val_loader, small_model, criterion, log)

# print(">>>>> accu before is: {:}".format(val_acc_1))


start_time = time.time()
epoch_time = AverageMeter()

epoches = 2
best_prec1 = 0

for epoch in range(0, epoches):
    adjust_learning_rate(optimizer, epoch)


    # train for one epoch
    train(train_loader, small_model, criterion, optimizer, epoch, log)
    # evaluate on validation set
    val_acc_1 = validate(val_loader, small_model, criterion, log)

    
    epoch_time.update(time.time() - start_time)
    start_time = time.time()

Epoch: [0][0/5005]	Time 17.759 (17.759)	Data 13.745 (13.745)	Loss 1.3956 (1.3956)	Prec@1 65.625 (65.625)	Prec@5 87.500 (87.500)


KeyboardInterrupt: 

In [20]:
print('evaluate: small')
print('small model accu', validate(val_loader, small_model, criterion, log))

evaluate: small


  from ipykernel import kernelapp as app
  app.launch_new_instance()


RuntimeError: CUDA out of memory. Tried to allocate 50.00 MiB (GPU 0; 31.72 GiB total capacity; 30.24 GiB already allocated; 27.88 MiB free; 30.71 GiB reserved in total by PyTorch)

In [11]:
cpu_pruned = pruned_network.cpu()




#out = cpu_pruned.forward(example)


cpu_model = model.cpu()


x1 = cpu_pruned.conv1(example)

x2 = cpu_model.conv1(example)

print(x1)

print('\n\n')
print(x2)


# print(x1-x2)

tensor([[[[ 0.1832,  0.4630,  0.3385,  ..., -0.2549, -0.3382, -0.7052],
          [-0.2600, -0.3938, -0.5602,  ...,  0.4342,  0.0499,  0.6193],
          [ 0.1438, -0.0319, -0.2912,  ..., -0.9251, -0.2181, -0.3713],
          ...,
          [ 0.3281, -0.8527, -1.1207,  ..., -0.6123, -0.2748,  0.0983],
          [-0.3462,  0.2096,  1.0418,  ..., -0.2365,  0.8906,  0.8099],
          [-0.2473, -0.2383, -0.3965,  ...,  0.3147, -0.6539, -0.7158]],

         [[-0.5738,  0.0835,  2.3208,  ...,  0.2214,  0.6061,  0.4176],
          [ 0.3679, -0.0086, -1.8610,  ...,  0.2497, -0.2142,  0.4031],
          [ 0.0830,  0.2935,  0.4378,  ...,  0.1226, -0.9952,  0.8878],
          ...,
          [ 0.4524, -0.6929, -0.4924,  ...,  0.4848,  0.0381, -0.3537],
          [-0.4654,  0.1049,  0.0948,  ..., -0.3540,  0.0705,  0.2736],
          [ 0.6062, -0.1947,  0.9414,  ...,  0.6538,  0.7172, -0.0060]],

         [[-0.9819, -0.4133, -0.0793,  ..., -0.8533,  0.2454, -1.2887],
          [-2.0572, -0.2565,  

In [15]:
big_model = model.cuda()
print('evaluate: small')
print('small model accu', validate(val_loader, big_model, criterion, log))

evaluate: small


  from ipykernel import kernelapp as app
  app.launch_new_instance()


Test: [0/782]	Time 1.752 (1.752)	Loss 0.4534 (0.4534)	Prec@1 93.750 (93.750)	Prec@5 95.312 (95.312)
Test: [200/782]	Time 0.099 (0.109)	Loss 0.6202 (0.6282)	Prec@1 85.938 (83.629)	Prec@5 93.750 (96.494)
Test: [400/782]	Time 0.100 (0.105)	Loss 0.3566 (0.7328)	Prec@1 92.188 (81.312)	Prec@5 98.438 (95.577)
Test: [600/782]	Time 0.101 (0.103)	Loss 0.6925 (0.8386)	Prec@1 87.500 (79.149)	Prec@5 96.875 (94.374)
 * Prec@1 78.220 Prec@5 93.968 Error@1 21.780
small model accu 78.22


In [18]:
big_model.conv1.weight.size()

AttributeError: 'NoneType' object has no attribute 'size'

In [None]:
epoches-99 :: 0.9-keep :: acc 78.22
epoches-99 :: 0.7-keep :: acc 77.684
epoches-55 :: 0.6-keep :: acc 72.430
epoches-51 :: 0.5-keep :: acc 60.704
epoches-33 :: 0.4-keep :: acc 17.450