## Libraries & Setup

In [36]:
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
#import models
import torch.distributed as dist
#from data import DataRegime
#from utils.log import setup_logging, ResultsLog
#from utils.optim import OptimRegime
#from utils.cross_entropy import CrossEntropyLoss
from torch.nn import CrossEntropyLoss 
from torch.optim import Adam
from utils.misc import torch_dtypes
#from utils.param_filter import FilterModules, is_bn
#from utils.convert_pytcv_model import convert_pytcv_model
from datetime import datetime
from ast import literal_eval
from utils.trainer import Trainer
from utils.adaquant import optimize_layer_adaquant
import numpy as np
import pandas as pd
import ast
from functools import partial
import random
import os
from utils.quantize import QConv2d, QLinear
from torch.nn import Conv2d, Linear
from utils.load_dataset import load_dataset, create_calibration_dataset
import torchvision.models as models
from models.resnet import ResNet_imagenet

In [25]:
acc = -1
loss = -1
best_prec1 = 0
dtype = torch.float32
### SET SEED
seed = 123
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

### LOGGING
#setup_logging(os.path.join(save_path, 'log.txt'),
#                  resume=args.resume is not '',
#                  dummy=args.distributed and args.local_rank > 0)
#results_path = os.path.join(save_path, 'results')
#results = ResultsLog(
#        results_path, title='Training Results - %s' % args.save)
#logging.info("saving to %s", save_path)
#logging.debug("run arguments: %s", args)
#logging.info("creating model %s", args.model)

device_ids = list(range(torch.cuda.device_count()))
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
    torch.cuda.set_device(device_ids[0])
    cudnn.benchmark = True
else:
    device_ids = None

## Data

In [26]:
path_to_dataset='./kaggle_tiny_imagenet/tiny-imagenet-200/dataset_tiny_imagenet'
#which_dataset = '/train' #'/val' #'/test'
train_directory = path_to_dataset + '/train'
val_directory = path_to_dataset + '/val'
test_directory = path_to_dataset + '/test'

In [27]:
# Training Data loading code
train_batch_size = 400
train_data = load_dataset(train_directory, train_batch_size)
   
# Evaluation Data loading code
val_batch_size = 128
val_data =  load_dataset(val_directory, val_batch_size)

# Test Data loading code
test_batch_size = 128
test_data =  load_dataset(test_directory, val_batch_size)

In [28]:
calib_data = create_calibration_dataset(train_directory) 

In [48]:
for i, (inputs, target) in (enumerate(calib_data)):
    print([target.shape])

TypeError: 'NoneType' object is not iterable

## Model, Optimizer, Trainer

In [37]:
### CREATE MODEL
#model = models.resnet18(pretrained=True)
model = ResNet_imagenet()

# define loss function (criterion) and optimizer
criterion = CrossEntropyLoss()

# optimizer configuration
optimizer = getattr(model, 'regime', [{'epoch': 0,
                                          'optimizer': Adam(model.parameters()) , # params??
                                          'lr':  1e-2,
                                          'momentum': 0.5,#args.momentum,
                                          'weight_decay': 0 #args.weight_decay
                                          }])

prunner = None 


In [38]:
# TRAINER?
trainer = Trainer(model, prunner, criterion, optimizer, device=torch.device('cpu'))

## Cache, Hook

In [39]:
cached_input_output = {}
quant_keys = ['.weight', '.bias', '.equ_scale', '.quantize_input.running_zero_point', '.quantize_input.running_range',
         '.quantize_weight.running_zero_point', '.quantize_weight.running_range','.quantize_input1.running_zero_point', '.quantize_input1.running_range'
         '.quantize_input2.running_zero_point', '.quantize_input2.running_range'] 

def Qhook(name, module, input, output):
    if module not in cached_qinput:
        cached_qinput[module] = []
        # Meanwhile store data in the RAM.
        cached_qinput[module].append(input[0].detach().cpu())
        # print(name)

def hook(name,module, input, output):
    if module not in cached_input_output:
        cached_input_output[module] = []
    # Meanwhile store data in the RAM.
    cached_input_output[module].append((input[0].detach().cpu(), output.detach().cpu()))
    # print(name)

In [41]:
for name, m in model.named_modules():
    if isinstance(m, Conv2d) or isinstance(m, Linear):
        print(name)

conv1
layer1.0.conv1
layer1.0.conv2
layer1.0.conv3
layer1.0.downsample.0
layer1.1.conv1
layer1.1.conv2
layer1.1.conv3
layer1.2.conv1
layer1.2.conv2
layer1.2.conv3
layer2.0.conv1
layer2.0.conv2
layer2.0.conv3
layer2.0.downsample.0
layer2.1.conv1
layer2.1.conv2
layer2.1.conv3
layer2.2.conv1
layer2.2.conv2
layer2.2.conv3
layer2.3.conv1
layer2.3.conv2
layer2.3.conv3
layer3.0.conv1
layer3.0.conv2
layer3.0.conv3
layer3.0.downsample.0
layer3.1.conv1
layer3.1.conv2
layer3.1.conv3
layer3.2.conv1
layer3.2.conv2
layer3.2.conv3
layer3.3.conv1
layer3.3.conv2
layer3.3.conv3
layer3.4.conv1
layer3.4.conv2
layer3.4.conv3
layer3.5.conv1
layer3.5.conv2
layer3.5.conv3
layer3.6.conv1
layer3.6.conv2
layer3.6.conv3
layer3.7.conv1
layer3.7.conv2
layer3.7.conv3
layer3.8.conv1
layer3.8.conv2
layer3.8.conv3
layer3.9.conv1
layer3.9.conv2
layer3.9.conv3
layer3.10.conv1
layer3.10.conv2
layer3.10.conv3
layer3.11.conv1
layer3.11.conv2
layer3.11.conv3
layer3.12.conv1
layer3.12.conv2
layer3.12.conv3
layer3.13.conv1
lay

In [42]:

handlers = []
count = 0
for name, m in model.named_modules():
    if isinstance(m, QConv2d) or isinstance(m, QLinear):
    #if isinstance(m, Conv2d) or isinstance(m, Linear):
    # if isinstance(m, QConv2d):
        m.quantize = False
        if count < 1000:
        # if (isinstance(m, QConv2d) and m.groups == 1) or isinstance(m, QLinear):
            handlers.append(m.register_forward_hook(partial(hook,name)))
            count += 1

# Store input/output for all quantizable layers
trainer.validate(train_data) 
print("Input/outputs cached")

for handler in handlers:
    handler.remove()

for m in model.modules():
    if isinstance(m, QConv2d) or isinstance(m, QLinear):
        m.quantize = True


Output shape: torch.Size([400, 1000]), Target shape: torch.Size([400])
Input/outputs cached


In [47]:
print(cached_input_output)

None


## Loop Through Layers

In [43]:
mse_df = pd.DataFrame(index=np.arange(len(cached_input_output)), columns=['name', 'bit', 'shape', 'mse_before', 'mse_after'])
print_freq = 100
evaluate = "evaluate"

In [45]:
for i, layer in enumerate(cached_input_output):
    if i>0: # and seq_adaquant = True
        count = 0
        cached_qinput = {}
        for name, m in model.named_modules():
            if layer.name==name:
                if count < 1000:
                    handler= m.register_forward_hook(partial(Qhook,name))
                    count += 1
        # Store input/output for all quantizable layers
        trainer.validate(train_data)
        print("cashed quant Input%s"%layer.name)
        cached_input_output[layer][0] = (cached_qinput[layer][0],cached_input_output[layer][0][1])
        handler.remove()            
    print("\nOptimize {}:{} for {} bit of shape {}".format(i, layer.name, layer.num_bits, layer.weight.shape))

    mse_before, mse_after = optimize_layer_adaquant(layer, cached_input_output[layer])
    
    print("\nMSE before optimization: {}".format(mse_before))
    print("MSE after optimization:  {}".format(mse_after))
    mse_df.loc[i, 'name'] = layer.name
    mse_df.loc[i, 'bit'] = layer.num_bits
    mse_df.loc[i, 'shape'] = str(layer.weight.shape)
    mse_df.loc[i, 'mse_before'] = mse_before
    mse_df.loc[i, 'mse_after'] = mse_after


mse_csv = evaluate + '.mse.csv'
mse_df.to_csv(mse_csv)

filename = evaluate + '.adaquant'
torch.save(model.state_dict(), filename)

train_data = None
cached_input_output = None
val_results = trainer.validate(val_data.get_loader())
#logging.info(val_results)

TypeError: 'NoneType' object is not iterable