In [None]:
!ls 

In [1]:
import argparse
import os, sys
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models
from utils import convert_secs2time, time_string, time_file_str
# from models import print_log
import models
import random
import numpy as np
from collections import OrderedDict

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

model_names

['alexnet',
 'caffe_cifar',
 'preresnet110',
 'preresnet20',
 'preresnet32',
 'preresnet44',
 'preresnet56',
 'resnet101',
 'resnet101_small',
 'resnet110',
 'resnet152',
 'resnet152_small',
 'resnet18',
 'resnet18_small',
 'resnet20',
 'resnet32',
 'resnet34',
 'resnet34_small',
 'resnet44',
 'resnet50',
 'resnet50_small',
 'resnet56',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn']

In [2]:
from dotmap import DotMap

args = DotMap()

args.data = '/home/hongky/datasets/imagenet'
args.save_dir = '0310_resnet101/resnet101-rate-0.7/'
if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)
    
args.arch = 'resnet101'
args.workers = 8
args.epochs = 10
args.start_epoch = 0
args.batch_size = 256
args.lr = 0.1
args.momentum = 0.9
args.weight_decay = 1e-4
args.print_freq = 200
args.resume=''

args.evaluate = False 
args.use_pretrain = True

# python pruning_train.py 
# -a resnet101 
# --save_dir ./snapshots/resnet101-rate-0.7 
# --rate 0.7 
# --layer_begin 0 
# --layer_end 309 
# --layer_inter 3  
# /path/to/Imagenet2012

# compress-rate
args.rate = 0.7
args.layer_begin = 0
args.layer_end = 309
args.layer_inter = 3

args.epoch_prune=1
args.skip_downsample=1
args.use_sparse=False 
args.sparse=''
args.lr_adjust=30


args.use_cuda = torch.cuda.is_available()
args.prefix = time_file_str()

print(args)

DotMap(data='/home/hongky/datasets/imagenet', save_dir='0310_resnet101/resnet101-rate-0.7/', arch='resnet101', workers=8, epochs=10, start_epoch=0, batch_size=256, lr=0.1, momentum=0.9, weight_decay=0.0001, print_freq=200, resume='', evaluate=False, use_pretrain=True, rate=0.7, layer_begin=0, layer_end=309, layer_inter=3, epoch_prune=1, skip_downsample=1, use_sparse=False, sparse='', lr_adjust=30, use_cuda=True, prefix='2020-10-03-416')


In [3]:

def import_sparse(model):
    checkpoint = torch.load(args.sparse)
    new_state_dict = OrderedDict()
    for k, v in checkpoint['state_dict'].items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
    print("sparse_model_loaded")
    return model


def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(non_blocking=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5), log)


def validate(val_loader, model, criterion, log):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(non_blocking=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                i, len(val_loader), batch_time=batch_time, loss=losses,
                top1=top1, top5=top5), log)

    print_log(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5,
                                                                                           error1=100 - top1.avg), log)

    return top1.avg


def save_checkpoint(state, is_best, filename, bestname):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestname)


def print_log(print_string, log):
    print("{:}".format(print_string))
    log.write('{:}\n'.format(print_string))
    log.flush()


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // args.lr_adjust))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


class Mask:
    def __init__(self, model):
        self.model_size = {}
        self.model_length = {}
        self.compress_rate = {}
        self.mat = {}
        self.model = model
        self.mask_index = []

    def get_codebook(self, weight_torch, compress_rate, length):
        weight_vec = weight_torch.view(length)
        weight_np = weight_vec.cpu().numpy()

        weight_abs = np.abs(weight_np)
        weight_sort = np.sort(weight_abs)

        threshold = weight_sort[int(length * (1 - compress_rate))]
        weight_np[weight_np <= -threshold] = 1
        weight_np[weight_np >= threshold] = 1
        weight_np[weight_np != 1] = 0

        print("codebook done")
        return weight_np

    def get_filter_codebook(self, weight_torch, compress_rate, length):
        codebook = np.ones(length)
        if len(weight_torch.size()) == 4:
            filter_pruned_num = int(weight_torch.size()[0] * (1 - compress_rate))
            weight_vec = weight_torch.view(weight_torch.size()[0], -1)
            # norm1 = torch.norm(weight_vec, 1, 1)
            # norm1_np = norm1.cpu().numpy()
            norm2 = torch.norm(weight_vec, 2, 1)
            norm2_np = norm2.cpu().numpy()
            filter_index = norm2_np.argsort()[:filter_pruned_num]
            #            norm1_sort = np.sort(norm1_np)
            #            threshold = norm1_sort[int (weight_torch.size()[0] * (1-compress_rate) )]
            kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3]
            for x in range(0, len(filter_index)):
                codebook[filter_index[x] * kernel_length: (filter_index[x] + 1) * kernel_length] = 0

            print("filter codebook done")
        else:
            pass
        return codebook

    def convert2tensor(self, x):
        x = torch.FloatTensor(x)
        return x

    def init_length(self):
        for index, item in enumerate(self.model.parameters()):
            self.model_size[index] = item.size()

        for index1 in self.model_size:
            for index2 in range(0, len(self.model_size[index1])):
                if index2 == 0:
                    self.model_length[index1] = self.model_size[index1][0]
                else:
                    self.model_length[index1] *= self.model_size[index1][index2]

    def init_rate(self, layer_rate):
        if 'vgg' in args.arch:
            cfg_5x = [24, 22, 41, 51, 108, 89, 111, 184, 276, 228, 512, 512, 512]
            cfg_official = [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]
            # cfg = [32, 64, 128, 128, 256, 256, 256, 256, 256, 256, 256, 256, 256]
            cfg_index = 0
            pre_cfg = True
            for index, item in enumerate(self.model.named_parameters()):
                self.compress_rate[index] = 1
                if len(item[1].size()) == 4:
                    print(item[1].size())
                    if not pre_cfg:
                        self.compress_rate[index] = layer_rate
                        self.mask_index.append(index)
                        print(item[0], "self.mask_index", self.mask_index)
                    else:
                        self.compress_rate[index] =  1 - cfg_5x[cfg_index] / item[1].size()[0]
                        self.mask_index.append(index)
                        print(item[0], "self.mask_index", self.mask_index, cfg_index, cfg_5x[cfg_index], item[1].size()[0],
                               )
                        cfg_index += 1
        elif "resnet" in args.arch:
            for index, item in enumerate(self.model.parameters()):
                self.compress_rate[index] = 1
            for key in range(args.layer_begin, args.layer_end + 1, args.layer_inter):
                self.compress_rate[key] = layer_rate
            if args.arch == 'resnet18':
                # last index include last fc layer
                last_index = 60
                skip_list = [21, 36, 51]
            elif args.arch == 'resnet34':
                last_index = 108
                skip_list = [27, 54, 93]
            elif args.arch == 'resnet50':
                last_index = 159
                skip_list = [12, 42, 81, 138]
            elif args.arch == 'resnet101':
                last_index = 312
                skip_list = [12, 42, 81, 291]
            elif args.arch == 'resnet152':
                last_index = 465
                skip_list = [12, 42, 117, 444]
            self.mask_index = [x for x in range(0, last_index, 3)]
            # skip downsample layer
            if args.skip_downsample == 1:
                for x in skip_list:
                    self.compress_rate[x] = 1
                    self.mask_index.remove(x)
                    print(self.mask_index)
            else:
                pass

    def init_mask(self, layer_rate):
        self.init_rate(layer_rate)
        for index, item in enumerate(self.model.parameters()):
            if (index in self.mask_index):
                self.mat[index] = self.get_filter_codebook(item.data, self.compress_rate[index],
                                                           self.model_length[index])
                self.mat[index] = self.convert2tensor(self.mat[index])
                if args.use_cuda:
                    self.mat[index] = self.mat[index].cuda()
        print("mask Ready")

    def do_mask(self):
        for index, item in enumerate(self.model.parameters()):
            if (index in self.mask_index):
                a = item.data.view(self.model_length[index])
                b = a * self.mat[index]
                item.data = b.view(self.model_size[index])
        print("mask Done")

    def if_zero(self):
        for index, item in enumerate(self.model.parameters()):
            #            if(index in self.mask_index):
            if index in [x for x in range(args.layer_begin, args.layer_end + 1, args.layer_inter)]:
                a = item.data.view(self.model_length[index])
                b = a.cpu().numpy()

                print("layer: %d, number of nonzero weight is %d, zero is %d" % (
                    index, np.count_nonzero(b), len(b) - np.count_nonzero(b)))


In [6]:

best_prec1 = 0

if not os.path.isdir(args.save_dir):
    os.makedirs(args.save_dir)
log = open(os.path.join(args.save_dir, '{}.{}.log'.format(args.arch, args.prefix)), 'w')

# version information
print_log("PyThon  version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("PyTorch version : {}".format(torch.__version__), log)
print_log("cuDNN   version : {}".format(torch.backends.cudnn.version()), log)
print_log("Vision  version : {}".format(torchvision.__version__), log)



PyThon  version : 3.7.0 (default, Oct  9 2018, 10:31:47)  [GCC 7.3.0]
PyTorch version : 1.6.0+cu101
cuDNN   version : 7603
Vision  version : 0.7.0+cu101


In [7]:


# create model
print_log("=> creating model '{}'".format(args.arch), log)
model = models.__dict__[args.arch](pretrained=True)
if args.use_sparse:
    model = import_sparse(model)
print_log("=> Model : {}".format(model), log)
print_log("=> parameter : {}".format(args), log)
print_log("Compress Rate: {}".format(args.rate), log)
print_log("Layer Begin: {}".format(args.layer_begin), log)
print_log("Layer End: {}".format(args.layer_end), log)
print_log("Layer Inter: {}".format(args.layer_inter), log)
print_log("Epoch prune: {}".format(args.epoch_prune), log)
print_log("Skip downsample : {}".format(args.skip_downsample), log)
print_log("Workers         : {}".format(args.workers), log)
print_log("Learning-Rate   : {}".format(args.lr), log)
print_log("Use Pre-Trained : {}".format(args.use_pretrain), log)
print_log("lr adjust : {}".format(args.lr_adjust), log)

if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
    model.features = torch.nn.DataParallel(model.features)
    model.cuda()
else:
    model = torch.nn.DataParallel(model).cuda()
    
print('Model:: ', model)


=> creating model 'resnet101'
=> Model : ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d

In [8]:

# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay,
                            nesterov=True)

# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print_log("=> loading checkpoint '{}'".format(args.resume), log)
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print_log("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']), log)
    else:
        print_log("=> no checkpoint found at '{}'".format(args.resume), log)

cudnn.benchmark = True

In [9]:
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    num_workers=args.workers, pin_memory=True, sampler=None)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=args.batch_size, shuffle=False,
    num_workers=args.workers, pin_memory=True)


    


In [10]:

filename = os.path.join(args.save_dir, 'checkpoint.{:}.{:}.pth.tar'.format(args.arch, args.prefix))
bestname = os.path.join(args.save_dir, 'best.{:}.{:}.pth.tar'.format(args.arch, args.prefix))

m = Mask(model)

m.init_length()
print("-" * 10 + "one epoch begin" + "-" * 10)
print("the compression rate now is {:}".format(args.rate))

val_acc_1 = validate(val_loader, model, criterion, log)

print(">>>>> Accuracy_origin_model: {:}".format(val_acc_1))

m.model = model

m.init_mask(args.rate)
# m.if_zero()
m.do_mask()
model = m.model
# m.if_zero()
if args.use_cuda:
    model = model.cuda()
val_acc_2 = validate(val_loader, model, criterion, log)
print(">>>>> Accuracy_masked_model: {:}".format(val_acc_2))




----------one epoch begin----------
the compression rate now is 0.7




Test: [0/196]	Time 12.922 (12.922)	Loss 0.4331 (0.4331)	Prec@1 87.891 (87.891)	Prec@5 98.047 (98.047)
 * Prec@1 77.374 Prec@5 93.546 Error@1 22.626
>>>>> Accuracy_origin_model: 77.374
[0, 3, 6, 9, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105, 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, 174, 177, 180, 183, 186, 189, 192, 195, 198, 201, 204, 207, 210, 213, 216, 219, 222, 225, 228, 231, 234, 237, 240, 243, 246, 249, 252, 255, 258, 261, 264, 267, 270, 273, 276, 279, 282, 285, 288, 291, 294, 297, 300, 303, 306, 309]
[0, 3, 6, 9, 15, 18, 21, 24, 27, 30, 33, 36, 39, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105, 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, 174, 177, 180, 183, 186, 189, 192, 195, 198, 201, 204, 207, 210, 213, 216, 219, 222, 225, 22

In [None]:
start_time = time.time()
epoch_time = AverageMeter()
for epoch in range(args.start_epoch, args.epochs):
    adjust_learning_rate(optimizer, epoch)

    need_hour, need_mins, need_secs = convert_secs2time(epoch_time.val * (args.epochs - epoch))
    need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
    print_log(
        ' [{:s}] :: {:3d}/{:3d} ----- [{:s}] {:s}'.format(
            args.arch, epoch, args.epochs, time_string(), need_time),
        log)

    
    # 1. train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, log)
    

    # 2. evaluate on validation set
    val_acc_1 = validate(val_loader, model, criterion, log)
    print('\n\n>>>>> Accuracy_model: ', val_acc_1)
    
    # 3. prune trained model - filter again after 1 training epoch
    if (epoch % args.epoch_prune == 0 or epoch == args.epochs - 1):
        #        if (random.randint(1,args.epoch_prune)==1 or epoch == args.epochs-1):
        m.model = model
        m.if_zero()
        m.init_mask(args.rate)
        m.do_mask()
        m.if_zero()
        model = m.model
        if args.use_cuda:
            model = model.cuda()

    # 4. validate pruned model
    val_acc_2 = validate(val_loader, model, criterion, log)
    print('\n\n>>>>> Accuracy_pruned: ', val_acc_2)
    
    
    # remember best prec@1 and save checkpoint
    is_best = val_acc_2 > best_prec1
    best_prec1 = max(val_acc_2, best_prec1)
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': args.arch,
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
        'optimizer': optimizer.state_dict(),
    }, is_best, filename, bestname)
    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()
log.close()

 [resnet101] ::   0/ 10 ----- [[2020-10-03 06:10:45]] [Need: 00:00:00]
Epoch: [0][0/5005]	Time 39.941 (39.941)	Data 39.561 (39.561)	Loss 3.0013 (3.0013)	Prec@1 48.828 (48.828)	Prec@5 70.703 (70.703)
Epoch: [0][200/5005]	Time 16.031 (3.066)	Data 15.661 (2.706)	Loss 3.4957 (4.0360)	Prec@1 26.953 (20.171)	Prec@5 55.078 (42.671)
Epoch: [0][400/5005]	Time 11.497 (2.753)	Data 11.148 (2.393)	Loss 3.2045 (3.6638)	Prec@1 31.250 (25.375)	Prec@5 56.250 (49.606)
Epoch: [0][600/5005]	Time 5.994 (2.699)	Data 5.645 (2.340)	Loss 2.9800 (3.4551)	Prec@1 37.109 (28.572)	Prec@5 62.500 (53.475)
Epoch: [0][800/5005]	Time 2.770 (2.706)	Data 2.423 (2.346)	Loss 3.0512 (3.3140)	Prec@1 35.156 (30.789)	Prec@5 62.109 (56.055)
Epoch: [0][1000/5005]	Time 4.086 (2.679)	Data 3.737 (2.319)	Loss 2.6420 (3.2096)	Prec@1 42.188 (32.476)	Prec@5 65.625 (57.899)
Epoch: [0][1200/5005]	Time 8.221 (2.674)	Data 7.870 (2.314)	Loss 2.6672 (3.1249)	Prec@1 38.281 (33.876)	Prec@5 67.969 (59.396)
Epoch: [0][1400/5005]	Time 2.269 (2.689



Epoch: [0][4600/5005]	Time 10.827 (2.621)	Data 10.392 (2.214)	Loss 2.2570 (2.5846)	Prec@1 48.438 (43.501)	Prec@5 74.219 (68.628)
Epoch: [0][4800/5005]	Time 14.158 (2.614)	Data 13.723 (2.205)	Loss 2.2274 (2.5709)	Prec@1 53.516 (43.755)	Prec@5 73.438 (68.853)
Epoch: [0][5000/5005]	Time 10.839 (2.610)	Data 10.313 (2.200)	Loss 2.1723 (2.5571)	Prec@1 50.391 (44.013)	Prec@5 74.219 (69.084)




Test: [0/196]	Time 8.641 (8.641)	Loss 1.3524 (1.3524)	Prec@1 66.016 (66.016)	Prec@5 87.891 (87.891)
 * Prec@1 51.786 Prec@5 77.592 Error@1 48.214


>>>>> Accuracy_model:  51.786
layer: 0, number of nonzero weight is 6615, zero is 2793
layer: 3, number of nonzero weight is 2880, zero is 1216
layer: 6, number of nonzero weight is 26325, zero is 10539
layer: 9, number of nonzero weight is 13774, zero is 2610
layer: 12, number of nonzero weight is 16384, zero is 0
layer: 15, number of nonzero weight is 11520, zero is 4864
layer: 18, number of nonzero weight is 26253, zero is 10611
layer: 21, number of nonzero weight is 14204, zero is 2180
layer: 24, number of nonzero weight is 11520, zero is 4864
layer: 27, number of nonzero weight is 27135, zero is 9729
layer: 30, number of nonzero weight is 15168, zero is 1216
layer: 33, number of nonzero weight is 23296, zero is 9472
layer: 36, number of nonzero weight is 121698, zero is 25758
layer: 39, number of nonzero weight is 55904, zero is 9632
l

layer: 60, number of nonzero weight is 44916, zero is 20620
layer: 63, number of nonzero weight is 46008, zero is 19528
layer: 66, number of nonzero weight is 102375, zero is 45081
layer: 69, number of nonzero weight is 45193, zero is 20343
layer: 72, number of nonzero weight is 92160, zero is 38912
layer: 75, number of nonzero weight is 403398, zero is 186426
layer: 78, number of nonzero weight is 183316, zero is 78828
layer: 81, number of nonzero weight is 524288, zero is 0
layer: 84, number of nonzero weight is 183711, zero is 78433
layer: 87, number of nonzero weight is 413586, zero is 176238
layer: 90, number of nonzero weight is 176763, zero is 85381
layer: 93, number of nonzero weight is 183280, zero is 78864
layer: 96, number of nonzero weight is 414243, zero is 175581
layer: 99, number of nonzero weight is 178002, zero is 84142
layer: 102, number of nonzero weight is 183947, zero is 78197
layer: 105, number of nonzero weight is 414720, zero is 175104
layer: 108, number of nonz



Epoch: [1][3400/5005]	Time 4.528 (2.540)	Data 4.176 (2.162)	Loss 2.0469 (2.1895)	Prec@1 51.953 (50.729)	Prec@5 77.734 (75.121)
Epoch: [1][3600/5005]	Time 6.718 (2.549)	Data 6.264 (2.173)	Loss 1.9813 (2.1886)	Prec@1 51.172 (50.751)	Prec@5 82.031 (75.136)
Epoch: [1][3800/5005]	Time 2.333 (2.549)	Data 1.983 (2.173)	Loss 1.9458 (2.1868)	Prec@1 53.125 (50.783)	Prec@5 80.859 (75.157)
Epoch: [1][4000/5005]	Time 10.893 (2.564)	Data 10.544 (2.189)	Loss 2.1426 (2.1858)	Prec@1 50.000 (50.794)	Prec@5 77.344 (75.177)
Epoch: [1][4200/5005]	Time 17.706 (2.566)	Data 17.357 (2.192)	Loss 2.2829 (2.1851)	Prec@1 48.047 (50.802)	Prec@5 76.172 (75.182)
Epoch: [1][4400/5005]	Time 18.178 (2.569)	Data 17.828 (2.196)	Loss 2.2217 (2.1834)	Prec@1 50.781 (50.834)	Prec@5 73.438 (75.207)
Epoch: [1][4600/5005]	Time 12.863 (2.557)	Data 12.512 (2.184)	Loss 2.2871 (2.1828)	Prec@1 49.609 (50.847)	Prec@5 72.656 (75.224)
Epoch: [1][4800/5005]	Time 6.205 (2.539)	Data 5.851 (2.166)	Loss 2.0418 (2.1819)	Prec@1 51.562 (50.882)

layer: 63, number of nonzero weight is 46008, zero is 19528
layer: 66, number of nonzero weight is 102636, zero is 44820
layer: 69, number of nonzero weight is 45028, zero is 20508
layer: 72, number of nonzero weight is 92160, zero is 38912
layer: 75, number of nonzero weight is 406728, zero is 183096
layer: 78, number of nonzero weight is 183257, zero is 78887
layer: 81, number of nonzero weight is 524288, zero is 0
layer: 84, number of nonzero weight is 183711, zero is 78433
layer: 87, number of nonzero weight is 413586, zero is 176238
layer: 90, number of nonzero weight is 175960, zero is 86184
layer: 93, number of nonzero weight is 183540, zero is 78604
layer: 96, number of nonzero weight is 414243, zero is 175581
layer: 99, number of nonzero weight is 177027, zero is 85117
layer: 102, number of nonzero weight is 183984, zero is 78160
layer: 105, number of nonzero weight is 414720, zero is 175104
layer: 108, number of nonzero weight is 173900, zero is 88244
layer: 111, number of no



Epoch: [2][2200/5005]	Time 17.209 (2.492)	Data 16.756 (2.131)	Loss 2.4277 (2.1336)	Prec@1 46.875 (51.818)	Prec@5 73.047 (75.981)
Epoch: [2][2400/5005]	Time 18.595 (2.496)	Data 18.245 (2.135)	Loss 2.2565 (2.1340)	Prec@1 50.000 (51.818)	Prec@5 73.047 (75.984)
Epoch: [2][2600/5005]	Time 17.748 (2.496)	Data 17.399 (2.135)	Loss 2.2137 (2.1350)	Prec@1 53.516 (51.810)	Prec@5 72.656 (75.953)
Epoch: [2][2800/5005]	Time 12.825 (2.473)	Data 12.476 (2.112)	Loss 1.9552 (2.1347)	Prec@1 55.469 (51.830)	Prec@5 80.469 (75.974)
Epoch: [2][3000/5005]	Time 6.734 (2.448)	Data 6.385 (2.088)	Loss 2.0306 (2.1331)	Prec@1 51.953 (51.870)	Prec@5 78.516 (76.004)
Epoch: [2][3200/5005]	Time 11.925 (2.461)	Data 11.575 (2.100)	Loss 2.1927 (2.1322)	Prec@1 50.781 (51.890)	Prec@5 75.781 (76.013)
Epoch: [2][3400/5005]	Time 14.448 (2.474)	Data 14.097 (2.114)	Loss 2.2357 (2.1341)	Prec@1 46.875 (51.856)	Prec@5 71.875 (75.984)
Epoch: [2][3600/5005]	Time 14.331 (2.492)	Data 13.981 (2.132)	Loss 2.1124 (2.1341)	Prec@1 50.391 (5

layer: 84, number of nonzero weight is 183711, zero is 78433
layer: 87, number of nonzero weight is 413586, zero is 176238
layer: 90, number of nonzero weight is 175960, zero is 86184
layer: 93, number of nonzero weight is 183540, zero is 78604
layer: 96, number of nonzero weight is 414243, zero is 175581
layer: 99, number of nonzero weight is 176877, zero is 85267
layer: 102, number of nonzero weight is 183984, zero is 78160
layer: 105, number of nonzero weight is 414720, zero is 175104
layer: 108, number of nonzero weight is 173824, zero is 88320
layer: 111, number of nonzero weight is 184068, zero is 78076
layer: 114, number of nonzero weight is 414126, zero is 175698
layer: 117, number of nonzero weight is 173652, zero is 88492
layer: 120, number of nonzero weight is 184297, zero is 77847
layer: 123, number of nonzero weight is 414720, zero is 175104
layer: 126, number of nonzero weight is 174128, zero is 88016
layer: 129, number of nonzero weight is 184299, zero is 77845
layer: 13



Epoch: [3][5000/5005]	Time 0.349 (2.288)	Data 0.000 (1.927)	Loss 1.9631 (2.1135)	Prec@1 53.516 (52.276)	Prec@5 78.906 (76.295)
Test: [0/196]	Time 8.934 (8.934)	Loss 1.3313 (1.3313)	Prec@1 66.797 (66.797)	Prec@5 88.672 (88.672)
 * Prec@1 54.558 Prec@5 79.542 Error@1 45.442


>>>>> Accuracy_model:  54.558
layer: 0, number of nonzero weight is 6615, zero is 2793
layer: 3, number of nonzero weight is 2880, zero is 1216
layer: 6, number of nonzero weight is 26325, zero is 10539
layer: 9, number of nonzero weight is 13774, zero is 2610
layer: 12, number of nonzero weight is 16384, zero is 0
layer: 15, number of nonzero weight is 11520, zero is 4864
layer: 18, number of nonzero weight is 26253, zero is 10611
layer: 21, number of nonzero weight is 14204, zero is 2180
layer: 24, number of nonzero weight is 11520, zero is 4864
layer: 27, number of nonzero weight is 27135, zero is 9729
layer: 30, number of nonzero weight is 15168, zero is 1216
layer: 33, number of nonzero weight is 23296, zero is

layer: 75, number of nonzero weight is 406728, zero is 183096
layer: 78, number of nonzero weight is 183139, zero is 79005
layer: 81, number of nonzero weight is 524288, zero is 0
layer: 84, number of nonzero weight is 183711, zero is 78433
layer: 87, number of nonzero weight is 413586, zero is 176238
layer: 90, number of nonzero weight is 175668, zero is 86476
layer: 93, number of nonzero weight is 183488, zero is 78656
layer: 96, number of nonzero weight is 414243, zero is 175581
layer: 99, number of nonzero weight is 176577, zero is 85567
layer: 102, number of nonzero weight is 183984, zero is 78160
layer: 105, number of nonzero weight is 414720, zero is 175104
layer: 108, number of nonzero weight is 173444, zero is 88700
layer: 111, number of nonzero weight is 184068, zero is 78076
layer: 114, number of nonzero weight is 414126, zero is 175698
layer: 117, number of nonzero weight is 173802, zero is 88342
layer: 120, number of nonzero weight is 184297, zero is 77847
layer: 123, numb



Epoch: [4][2600/5005]	Time 13.140 (3.027)	Data 12.789 (2.666)	Loss 1.9747 (2.0976)	Prec@1 52.344 (52.546)	Prec@5 78.516 (76.511)
Epoch: [4][2800/5005]	Time 13.315 (3.041)	Data 12.966 (2.680)	Loss 2.0039 (2.0986)	Prec@1 52.344 (52.543)	Prec@5 77.734 (76.496)
Epoch: [4][3000/5005]	Time 19.484 (3.056)	Data 19.134 (2.695)	Loss 2.0735 (2.0996)	Prec@1 51.562 (52.532)	Prec@5 73.047 (76.475)
Epoch: [4][3200/5005]	Time 15.081 (3.072)	Data 14.729 (2.711)	Loss 2.0794 (2.1018)	Prec@1 48.438 (52.489)	Prec@5 77.734 (76.440)
Epoch: [4][3400/5005]	Time 15.955 (3.088)	Data 15.590 (2.727)	Loss 2.2244 (2.1031)	Prec@1 50.000 (52.468)	Prec@5 73.438 (76.424)
Epoch: [4][3600/5005]	Time 13.496 (3.099)	Data 13.129 (2.738)	Loss 1.9251 (2.1036)	Prec@1 51.953 (52.450)	Prec@5 79.688 (76.425)
Epoch: [4][3800/5005]	Time 9.470 (3.047)	Data 9.100 (2.686)	Loss 2.3634 (2.1045)	Prec@1 46.875 (52.435)	Prec@5 71.875 (76.418)
Epoch: [4][4000/5005]	Time 12.165 (3.017)	Data 11.815 (2.656)	Loss 2.0498 (2.1040)	Prec@1 55.859 (5

layer: 78, number of nonzero weight is 183080, zero is 79064
layer: 81, number of nonzero weight is 524288, zero is 0
layer: 84, number of nonzero weight is 183711, zero is 78433
layer: 87, number of nonzero weight is 413586, zero is 176238
layer: 90, number of nonzero weight is 175668, zero is 86476
layer: 93, number of nonzero weight is 183488, zero is 78656
layer: 96, number of nonzero weight is 414243, zero is 175581
layer: 99, number of nonzero weight is 176502, zero is 85642
layer: 102, number of nonzero weight is 183984, zero is 78160
layer: 105, number of nonzero weight is 414720, zero is 175104
layer: 108, number of nonzero weight is 172912, zero is 89232
layer: 111, number of nonzero weight is 184069, zero is 78075
layer: 114, number of nonzero weight is 414126, zero is 175698
layer: 117, number of nonzero weight is 173127, zero is 89017
layer: 120, number of nonzero weight is 184297, zero is 77847
layer: 123, number of nonzero weight is 414720, zero is 175104
layer: 126, num



Epoch: [5][1400/5005]	Time 0.349 (2.701)	Data 0.000 (2.339)	Loss 2.1927 (2.0886)	Prec@1 51.172 (52.751)	Prec@5 74.609 (76.737)
Epoch: [5][1600/5005]	Time 0.350 (2.725)	Data 0.000 (2.363)	Loss 2.2604 (2.0886)	Prec@1 51.953 (52.773)	Prec@5 73.047 (76.741)
Epoch: [5][1800/5005]	Time 0.350 (2.723)	Data 0.000 (2.361)	Loss 2.1041 (2.0895)	Prec@1 52.734 (52.753)	Prec@5 77.734 (76.724)
Epoch: [5][2000/5005]	Time 0.471 (2.721)	Data 0.000 (2.360)	Loss 2.0952 (2.0911)	Prec@1 53.516 (52.743)	Prec@5 73.438 (76.694)
Epoch: [5][2200/5005]	Time 0.351 (2.689)	Data 0.000 (2.328)	Loss 2.0733 (2.0910)	Prec@1 50.391 (52.732)	Prec@5 76.953 (76.696)
Epoch: [5][2400/5005]	Time 0.350 (2.701)	Data 0.000 (2.340)	Loss 1.9547 (2.0918)	Prec@1 57.812 (52.699)	Prec@5 76.953 (76.684)
Epoch: [5][2600/5005]	Time 0.350 (2.687)	Data 0.000 (2.326)	Loss 2.3733 (2.0939)	Prec@1 47.266 (52.645)	Prec@5 75.391 (76.650)
Epoch: [5][2800/5005]	Time 0.348 (2.689)	Data 0.000 (2.328)	Loss 2.0749 (2.0950)	Prec@1 53.516 (52.605)	Prec@5 

layer: 294, number of nonzero weight is 737280, zero is 311296
layer: 297, number of nonzero weight is 1654272, zero is 705024
layer: 300, number of nonzero weight is 954634, zero is 93942
layer: 303, number of nonzero weight is 735232, zero is 313344
layer: 306, number of nonzero weight is 1657503, zero is 701793
layer: 309, number of nonzero weight is 955248, zero is 93328
[0, 3, 6, 9, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105, 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, 174, 177, 180, 183, 186, 189, 192, 195, 198, 201, 204, 207, 210, 213, 216, 219, 222, 225, 228, 231, 234, 237, 240, 243, 246, 249, 252, 255, 258, 261, 264, 267, 270, 273, 276, 279, 282, 285, 288, 291, 294, 297, 300, 303, 306, 309]
[0, 3, 6, 9, 15, 18, 21, 24, 27, 30, 33, 36, 39, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105, 108, 111

layer: 309, number of nonzero weight is 730864, zero is 317712
Test: [0/196]	Time 7.289 (7.289)	Loss 0.7840 (0.7840)	Prec@1 76.953 (76.953)	Prec@5 98.047 (98.047)
 * Prec@1 55.210 Prec@5 80.408 Error@1 44.790


>>>>> Accuracy_pruned:  55.21
 [resnet101] ::   6/ 10 ----- [[2020-10-04 04:00:43]] [Need: 14:19:48]
Epoch: [6][0/5005]	Time 30.880 (30.880)	Data 30.463 (30.463)	Loss 2.0868 (2.0868)	Prec@1 51.562 (51.562)	Prec@5 76.953 (76.953)
Epoch: [6][200/5005]	Time 13.136 (2.728)	Data 12.768 (2.365)	Loss 1.8541 (2.0472)	Prec@1 55.078 (53.446)	Prec@5 82.812 (77.284)
Epoch: [6][400/5005]	Time 9.765 (2.361)	Data 9.404 (1.997)	Loss 2.1772 (2.0617)	Prec@1 49.609 (53.201)	Prec@5 72.656 (76.975)
Epoch: [6][600/5005]	Time 7.738 (2.179)	Data 7.252 (1.816)	Loss 2.0082 (2.0658)	Prec@1 52.344 (53.133)	Prec@5 78.125 (76.900)
Epoch: [6][800/5005]	Time 15.347 (2.158)	Data 14.997 (1.796)	Loss 2.1818 (2.0695)	Prec@1 50.000 (53.008)	Prec@5 72.266 (76.863)




Epoch: [6][1000/5005]	Time 15.207 (2.154)	Data 14.852 (1.792)	Loss 2.3289 (2.0713)	Prec@1 47.656 (53.016)	Prec@5 71.875 (76.840)
Epoch: [6][1200/5005]	Time 19.171 (2.211)	Data 18.810 (1.849)	Loss 2.0974 (2.0759)	Prec@1 58.203 (52.948)	Prec@5 75.781 (76.770)
Epoch: [6][1400/5005]	Time 25.076 (2.359)	Data 24.711 (1.997)	Loss 2.0310 (2.0798)	Prec@1 53.516 (52.912)	Prec@5 80.078 (76.742)
Epoch: [6][1600/5005]	Time 22.078 (2.446)	Data 21.729 (2.084)	Loss 2.2094 (2.0822)	Prec@1 46.094 (52.855)	Prec@5 74.609 (76.725)
Epoch: [6][1800/5005]	Time 19.571 (2.500)	Data 19.204 (2.138)	Loss 2.0203 (2.0842)	Prec@1 55.859 (52.828)	Prec@5 76.562 (76.695)
Epoch: [6][2000/5005]	Time 22.323 (2.578)	Data 21.960 (2.217)	Loss 1.9728 (2.0859)	Prec@1 54.297 (52.797)	Prec@5 75.781 (76.667)
Epoch: [6][2200/5005]	Time 14.437 (2.661)	Data 14.087 (2.299)	Loss 1.8617 (2.0873)	Prec@1 55.078 (52.777)	Prec@5 78.516 (76.665)
Epoch: [6][2400/5005]	Time 22.946 (2.727)	Data 22.473 (2.365)	Loss 2.2688 (2.0869)	Prec@1 49.609 

In [None]:
if args.evaluate:
    validate(val_loader, model, criterion, log)