In [1]:
import os
import json
import numpy as np

### Loading model

In [2]:
from aimet_torch.batch_norm_fold import fold_all_batch_norms
from torchvision.models import resnet18
import torch
torch.manual_seed(0)

model = resnet18(pretrained=True)
use_cuda = False
if torch.cuda.is_available():
    use_cuda = True
    model.to(torch.device('cuda'))
    
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

use_cuda

2024-12-30 11:48:57,737 - root - INFO - AIMET




[2024-12-30 11:49:01,855] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cpu (auto detect)


False

In [3]:
DATASET_DIR = '/home/shayan/Desktop/aimet/Examples/torch/quantization/'
import sys
sys.path.append("/home/shayan/Desktop/aimet/")

import os
import torch
from Examples.common import image_net_config
from Examples.torch.utils.image_net_evaluator import ImageNetEvaluator
from Examples.torch.utils.image_net_trainer import ImageNetTrainer
from Examples.torch.utils.image_net_data_loader import ImageNetDataLoader

sys.path.remove("/home/shayan/Desktop/aimet/")

class ImageNetDataPipeline:

    @staticmethod
    def get_val_dataloader() -> torch.utils.data.DataLoader:
        """
        Instantiates a validation dataloader for ImageNet dataset and returns it
        """
        data_loader = ImageNetDataLoader(DATASET_DIR,
                                         image_size=image_net_config.dataset['image_size'],
                                         batch_size=image_net_config.evaluation['batch_size'],
                                         is_training=False,
                                         num_workers=image_net_config.evaluation['num_workers']).data_loader
        return data_loader

    @staticmethod
    def evaluate(model: torch.nn.Module, use_cuda: bool) -> float:
        """
        Given a torch model, evaluates its Top-1 accuracy on the dataset
        :param model: the model to evaluate
        :param iterations: the number of batches to be used to evaluate the model. A value of 'None' means the model will be
                           evaluated on the entire dataset once.
        :param use_cuda: whether or not the GPU should be used.
        """
        evaluator = ImageNetEvaluator(DATASET_DIR, image_size=image_net_config.dataset['image_size'],
                                      batch_size=image_net_config.evaluation['batch_size'],
                                      num_workers=image_net_config.evaluation['num_workers'])

        return evaluator.evaluate(model, iterations=None, use_cuda=use_cuda)

In [4]:
def pass_calibration_data(sim_model, use_cuda):
    data_loader = ImageNetDataPipeline.get_val_dataloader()
    batch_size = data_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    idx = 0
    with torch.no_grad():
        for path, input_data, target_data in data_loader:
            # if "cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa" not in path[0]:
            #     continue
            # if "cf" in path[0]:
            print(path)
            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)
            break


In [5]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel
from copy import deepcopy

dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=deepcopy(model),
                           quant_scheme=QuantScheme.post_training_tf,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=8,
                           config_file="/home/shayan/Desktop/aimet/my_config.json")

sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

2024-12-30 11:49:02,621 - Quant - INFO - Unsupported op type Squeeze
2024-12-30 11:49:02,622 - Quant - INFO - Unsupported op type Mean
2024-12-30 11:49:02,623 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default
('/home/shayan/Desktop/aimet/Examples/torch/quantization/val/9/cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa.jpeg',)


In [6]:
# sim.export("./temp", filename_prefix="model", dummy_input=dummy_input)

In [7]:
print(str(sim))

-------------------------
Quantized Model Report
-------------------------
----------------------------------------------------------
Layer: conv1
  Input[0]: bw=8, encoding-present=True
    StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-2.231783390045166, max=2.231783390045166, delta=0.017573097559410757, offset=-127.0
  -------
  Param[weight]: bw=8, encoding-present=True
    StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-0.39387544989585876, max=0.39387544989585876, delta=0.0031013814952429823, offset=-127.0
  -------
  Param[bias]: bw=8, encoding-present=True
    StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-0.6932891607284546, max=0.6932891607284546, delta=0.005458969769515391, offs

In [8]:
offset = lambda min, delta: (min) / delta
delta = lambda max, min, bitwidth: (max - min) / (2 ** bitwidth - 1)

In [9]:
encoding = list(sim._get_qc_quantized_layers(sim.model)[0][1].param_quantizers.values())[0]._encoding[0]
encoding_min = encoding.min
encoding_max = encoding.max
encoding_delta = encoding.delta
encoding_offset = encoding.offset
encoding_bitwidth = list(sim._get_qc_quantized_layers(sim.model)[0][1].param_quantizers.values())[0].bitwidth

my_delta = delta(encoding_max, encoding_min, encoding_bitwidth)
my_offset = offset(encoding_min, my_delta)

print(f"{my_delta} == {encoding_delta} and {my_offset} == {encoding_offset}")
print(f"{encoding_min=}, {encoding_max=}, {encoding_delta=}")
print(f"{encoding_offset=}, {encoding_bitwidth=}")

0.0030892192148694805 == 0.0031013814952429823 and -127.5 == -127.0
encoding_min=-0.39387544989585876, encoding_max=0.39387544989585876, encoding_delta=0.0031013814952429823
encoding_offset=-127.0, encoding_bitwidth=8


### Time for Grid Search implementation

In [10]:
class UnlabelledDataset(torch.utils.data.Dataset):
    def __init__(self,):
        self.dataset = ImageNetDataPipeline.get_val_dataloader().dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx][1]

In [11]:
offset = lambda min, delta: min / delta
delta = lambda max, min, bitwidth: (max - min) / (2 ** bitwidth - 1)

def compute_encoding(min, max, bw=8):
    enc_delta = delta(max, min, bw)
    enc_offset = offset(min, enc_delta)
    return {"min": min, "max": max, "offset": enc_offset, "delta": enc_delta}

def set_encoding_for_layer(layer, weight_encoding: dict, bias_encoding: dict=None, input_encoding: dict=None, output_encoding: dict=None):
    if weight_encoding:
        layer.param_quantizers['weight'].encoding.max = weight_encoding['max']
        layer.param_quantizers['weight'].encoding.min = weight_encoding['min']
        layer.param_quantizers['weight'].encoding.delta = weight_encoding['delta']
        layer.param_quantizers['weight'].encoding.offset = weight_encoding['offset']

    if bias_encoding:
        layer.param_quantizers['bias'].encoding.max = bias_encoding['max']
        layer.param_quantizers['bias'].encoding.min = bias_encoding['min']
        layer.param_quantizers['bias'].encoding.delta = bias_encoding['delta']
        layer.param_quantizers['bias'].encoding.offset = bias_encoding['offset']
            
    if input_encoding:
        for input_quantizer in layer.input_quantizers:
            input_quantizer.encoding.max = input_encoding['max']
            input_quantizer.encoding.min = input_encoding['min']
            input_quantizer.encoding.delta = input_encoding['delta']
            input_quantizer.encoding.offset = input_encoding['offset']
    
    if output_encoding:
        for output_quantizer in layer.output_quantizers:
            output_quantizer.encoding.max = output_encoding['max']
            output_quantizer.encoding.min = output_encoding['min']
            output_quantizer.encoding.delta = output_encoding['delta']
            output_quantizer.encoding.offset = output_encoding['offset']
        
    return layer
    
compute_encoding(0.0, 10, 8)

{'min': 0.0, 'max': 10, 'offset': 0.0, 'delta': 0.0392156862745098}

In [12]:
# quant_wrapper.input_quantizers[0].encoding.max

In [13]:
import torch
from aimet_torch import utils
num_batches = 1

def _compute_mse_loss(module: torch.nn.Module, quant_wrapper: torch.nn.Module,
                        fp32_model: torch.nn.Module, sim: QuantizationSimModel, verbose=False) -> float:
    """
    Compute MSE loss between fp32 and quantized output activations for each batch, add for
    all the batches and return averaged mse loss.

    :param module: module from the fp32_model.
    :param quant_wrapper: Corresponding quant wrapper from the QuantSim model.
    :param fp32_model: PyTorch model.
    :param sim: Quantsim model.
    :return: MSE loss between fp32 and quantized output activations.
    """
    # output activations collector.
    orig_module_collector = utils.ModuleData(fp32_model, module)
    quant_module_collector = utils.ModuleData(sim.model, quant_wrapper)
    
    if verbose:
        weight = quant_module_collector._module.param_quantizers['weight']
        bias = quant_module_collector._module.param_quantizers['bias']
        inp = quant_module_collector._module.input_quantizers
        outs = quant_module_collector._module.output_quantizers
        
        print("WEIGHT")    
        print(weight)
        print("BIAS")
        print(bias)
        print("INPUT")
        for i in inp:
            print(i)
        print("OUTPUT")
        for i in outs:
            print(i)
    total = 0
    loss = 0.0
    batch_index = 0
    unlabeled_dataset_iterable = torch.utils.data.DataLoader(UnlabelledDataset(), batch_size=1, shuffle=False)
    for model_inputs in unlabeled_dataset_iterable:
        assert isinstance(model_inputs, (torch.Tensor, tuple, list))
        with torch.no_grad():
            _, quantized_out_acts = quant_module_collector.collect_inp_out_data(model_inputs,
                                                                                collect_input=False,
                                                                                collect_output=True)
            _, fp32_out_acts = orig_module_collector.collect_inp_out_data(model_inputs,
                                                                            collect_input=False,
                                                                            collect_output=True)
        loss += torch.nn.functional.mse_loss(fp32_out_acts, quantized_out_acts).item()
        total += fp32_out_acts.size(0)
        batch_index += 1
        if batch_index == num_batches:
            break

    average_loss = loss/total
    return average_loss

In [14]:
fp32_name, fp32_module = list(dict(model.named_modules()).items())[-1]
quant_name, quant_wrapper = list(dict(sim.model.named_modules()).items())[-2]

print(fp32_module)
print(quant_wrapper)
print("-----------------------------")
print(fp32_name)
print(quant_name)

Linear(in_features=512, out_features=1000, bias=True)
StaticGridQuantWrapper(
  (_module_to_wrap): Linear(in_features=512, out_features=1000, bias=True)
)
-----------------------------
fc
fc


In [15]:
[k for k in quant_wrapper.__dict__.keys() if "quantizers" in k]
print(quant_wrapper.output_quantizers[0])

StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-11.122593879699707, max=11.122593879699707, delta=0.08757947936771422, offset=-127.0



In [16]:
_compute_mse_loss(fp32_module, quant_wrapper, model, sim)

0.11874568462371826

### Updating Weight, Biases, etc at same time

In [None]:
from tqdm import tqdm
def make_ranges(max, n=30, max_diff=0.1):
    max_diff = max * 0.5
    return np.linspace(max-max_diff if max-max_diff >= 0 else 0, max, n)

def check_encodings_present(quant_wrapper):
    encodings_present = {"weight": False, "bias": False, "input": False, "output": False}
    if quant_wrapper.param_quantizers['weight']._encoding:
        encodings_present['weight'] = True
    if quant_wrapper.param_quantizers['bias']._encoding:
        encodings_present['bias'] = True
    if quant_wrapper.input_quantizers[0]._encoding:
        encodings_present['input'] = True
    if quant_wrapper.output_quantizers[0]._encoding:
        encodings_present['output'] = True
    return encodings_present

def grid_search(sim, model, fp32_module, quant_wrapper, verbose=False):
    encodings_present = check_encodings_present(quant_wrapper)
    encoding_max = {k: None for k in encodings_present.keys()}
    
    # if encodings_present["weight"]:
    #     encoding_max["weight"] = quant_wrapper.param_quantizers['weight']._encoding[0].max
    # if encodings_present["bias"]:
    #     encoding_max["bias"] = quant_wrapper.param_quantizers['bias']._encoding[0].max
    # if encodings_present["input"]:
    #     encoding_max["input"] = quant_wrapper.input_quantizers[0]._encoding[0].max
    # if encodings_present["output"]:
    #     encoding_max["output"] = [out._encoding for out in quant_wrapper.output_quantizers if out._encoding]
    
    weight_maximum = quant_wrapper.param_quantizers['weight']._encoding[0].max 
    bias_maximum = quant_wrapper.param_quantizers['bias']._encoding[0].max
    # input_maximum = quant_wrapper.input_quantizers[0]._encoding[0].max
    output_maximum = [out._encoding for out in quant_wrapper.output_quantizers if out._encoding][0][0].max
    weight_range = make_ranges(weight_maximum)
    bias_range = make_ranges(bias_maximum)
    input_range = make_ranges(output_maximum)
    
    orig_loss = _compute_mse_loss(fp32_module, quant_wrapper, model, sim)
    
    if verbose:
        print(f"""
            weight_maximum = {weight_maximum} \n {weight_range} \n \n
            bias_maximum = {bias_maximum} \n {bias_range} \n \n
            input_maximum = {None} \n {input_range} \n \n
            output_maximum = {output_maximum} \n None \n \n
            """)
        print(f"Original loss: {orig_loss}")

    best_loss = float('inf')
    best_params = {"weight": weight_maximum, "bias": bias_maximum, "output": output_maximum}
    all_logs = []
    for weight in tqdm(weight_range):
        for bias in tqdm(bias_range):
            for inp in input_range:
                weight_encoding = compute_encoding(-weight, weight)
                bias_encoding = compute_encoding(-bias, bias)
                input_encoding = compute_encoding(-inp, inp)
                set_encoding_for_layer(quant_wrapper, weight_encoding, bias_encoding, output_encoding=input_encoding)
                loss = _compute_mse_loss(fp32_module, quant_wrapper, model, sim)
                all_logs.append({"weight": weight, "bias": bias, "output": inp, "loss": loss})
                if loss < best_loss:
                    best_loss = loss
                    best_params = {"weight": weight, "bias": bias, "output": inp}
    
    if verbose:
        print(f"New best loss: {best_loss} with params: {best_params}")
        if weight_maximum != best_params['weight']:
            print(f"Weight maximum changed from {weight_maximum} to {best_params['weight']}")
        if bias_maximum != best_params['bias']:
            print(f"Bias maximum changed from {bias_maximum} to {best_params['bias']}")
        if output_maximum != best_params['output']:
            print(f"output maximum changed from {output_maximum} to {best_params['output']}")
    
    return best_loss, best_params, all_logs
    
best_loss, best_params, all_logs =  grid_search(sim, model, fp32_module, quant_wrapper, verbose=True)


            weight_maximum = 0.7152369618415833 
 [0.35761848 0.36995015 0.38228182 0.3946135  0.40694517 0.41927684
 0.43160851 0.44394018 0.45627185 0.46860353 0.4809352  0.49326687
 0.50559854 0.51793021 0.53026189 0.54259356 0.55492523 0.5672569
 0.57958857 0.59192024 0.60425192 0.61658359 0.62891526 0.64124693
 0.6535786  0.66591027 0.67824195 0.69057362 0.70290529 0.71523696] 
 

            bias_maximum = 0.061649736016988754 
 [0.03082487 0.03188779 0.03295072 0.03401365 0.03507657 0.0361395
 0.03720243 0.03826535 0.03932828 0.04039121 0.04145413 0.04251706
 0.04357999 0.04464291 0.04570584 0.04676877 0.04783169 0.04889462
 0.04995754 0.05102047 0.0520834  0.05314632 0.05420925 0.05527218
 0.0563351  0.05739803 0.05846096 0.05952388 0.06058681 0.06164974] 
 

            input_maximum = None 
 [ 5.56129694  5.7530658   5.94483466  6.13660352  6.32837238  6.52014124
  6.7119101   6.90367896  7.09544782  7.28721668  7.47898554  7.6707544
  7.86252326  8.05429212  8.24606098  8.4

100%|██████████| 30/30 [01:24<00:00,  2.81s/it]
100%|██████████| 30/30 [01:24<00:00,  2.81s/it]
100%|██████████| 30/30 [01:24<00:00,  2.82s/it]
100%|██████████| 30/30 [01:24<00:00,  2.82s/it]
100%|██████████| 30/30 [01:24<00:00,  2.80s/it]
100%|██████████| 30/30 [01:24<00:00,  2.82s/it]
100%|██████████| 30/30 [01:24<00:00,  2.83s/it]
100%|██████████| 30/30 [01:27<00:00,  2.93s/it]
100%|██████████| 30/30 [01:26<00:00,  2.90s/it]
100%|██████████| 30/30 [01:24<00:00,  2.82s/it]
 33%|███▎      | 10/30 [14:10<28:31, 85.59s/it]

In [58]:
fp32_name

'fc'

In [59]:
import pandas as pd
import json
import os

output_dir = "./logs/"
output_dir = os.path.join(output_dir, fp32_name)

os.makedirs(output_dir, exist_ok=True)
pd.DataFrame(all_logs).to_csv(os.path.join(output_dir, "logs.csv"))

with open(os.path.join(output_dir, "best_params.json"), "w") as f:
    best_params.update({"best_loss": best_loss})
    json.dump(best_params, f)


In [None]:
from tqdm import tqdm
def make_ranges(max, n=15, max_diff=0.1):
    return np.linspace(max-max_diff if max-max_diff >= 0 else 0, max, n)

def grid_search(sim, model, fp32_module, quant_wrapper, verbose=False):
    weight_maximum = quant_wrapper.param_quantizers['weight']._encoding[0].max
    bias_maximum = quant_wrapper.param_quantizers['bias']._encoding[0].max
    input_maximum = quant_wrapper.input_quantizers[0]._encoding[0].max
    output_maximum = [out._encoding for out in quant_wrapper.output_quantizers if out._encoding]
    
    weight_range = make_ranges(weight_maximum)
    bias_range = make_ranges(bias_maximum)
    input_range = make_ranges(input_maximum)
    
    orig_loss = _compute_mse_loss(fp32_module, quant_wrapper, model, sim)
    
    if verbose:
        print(f"""
            weight_maximum = {weight_maximum} \n {weight_range} \n \n
            bias_maximum = {bias_maximum} \n {bias_range} \n \n
            input_maximum = {input_maximum} \n {input_range} \n \n
            output_maximum = {output_maximum} \n None \n \n
            """)
        print(f"Original loss: {orig_loss}")
        
    best_loss = float('inf')
    best_params = {"weight": weight_maximum, "bias": bias_maximum, "input": input_maximum}
    for weight in tqdm(weight_range):
        for bias in tqdm(bias_range):
            for inp in input_range:
                weight_encoding = compute_encoding(-weight, weight)
                bias_encoding = compute_encoding(-bias, bias)
                input_encoding = compute_encoding(-inp, inp)
                set_encoding_for_layer(quant_wrapper, weight_encoding, bias_encoding, input_encoding)
                loss = _compute_mse_loss(fp32_module, quant_wrapper, model, sim)
                if loss < best_loss:
                    best_loss = loss
                    best_params = {"weight": weight, "bias": bias, "input": inp}
    
    if verbose:
        print(f"New best loss: {best_loss} with params: {best_params}")
        if weight_maximum != best_params['weight']:
            print(f"Weight maximum changed from {weight_maximum} to {best_params['weight']}")
        if bias_maximum != best_params['bias']:
            print(f"Bias maximum changed from {bias_maximum} to {best_params['bias']}")
        if input_maximum != best_params['input']:
            print(f"Input maximum changed from {input_maximum} to {best_params['input']}")
        
    
grid_search(sim, model, fp32_module, quant_wrapper, verbose=True)


            weight_maximum = 0.39387544989585876 
 [0.29387545 0.30101831 0.30816116 0.31530402 0.32244688 0.32958974
 0.33673259 0.34387545 0.35101831 0.35816116 0.36530402 0.37244688
 0.37958974 0.38673259 0.39387545] 
 

            bias_maximum = 0.6932891607284546 
 [0.59328916 0.60043202 0.60757488 0.61471773 0.62186059 0.62900345
 0.6361463  0.64328916 0.65043202 0.65757488 0.66471773 0.67186059
 0.67900345 0.6861463  0.69328916] 
 

            input_maximum = 2.231783390045166 
 [2.13178339 2.13892625 2.1460691  2.15321196 2.16035482 2.16749768
 2.17464053 2.18178339 2.18892625 2.1960691  2.20321196 2.21035482
 2.21749768 2.22464053 2.23178339] 
 

            output_maximum = [] 
 None 
 

            
Original loss: 0.00016707682516425848


  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [00:01<00:00, 11.42it/s]
100%|██████████| 15/15 [00:01<00:00, 11.88it/s]
100%|██████████| 15/15 [00:01<00:00, 11.84it/s]
100%|██████████| 15/15 [00:01<00:00, 11.99it/s]
100%|██████████| 15/15 [00:01<00:00, 11.92it/s]
100%|██████████| 15/15 [00:01<00:00, 12.13it/s]
100%|██████████| 15/15 [00:01<00:00, 12.20it/s]
100%|██████████| 15/15 [00:01<00:00, 11.95it/s]
100%|██████████| 15/15 [00:01<00:00, 12.05it/s]
100%|██████████| 15/15 [00:01<00:00, 11.86it/s]
100%|██████████| 15/15 [00:01<00:00, 12.02it/s]
100%|██████████| 15/15 [00:01<00:00, 11.79it/s]
100%|██████████| 15/15 [00:01<00:00, 11.96it/s]
100%|██████████| 15/15 [00:01<00:00, 11.93it/s]
100%|██████████| 15/15 [00:01<00:00, 12.04it/s]
100%|██████████| 15/15 [00:18<00:00,  1.26s/it]

New best loss: 4.611624899553135e-05 with params: {'weight': 0.39387544989585876, 'bias': 0.6861463035855975, 'input': 2.1746405329023086}
Bias maximum changed from 0.6932891607284546 to 0.6861463035855975
Input maximum changed from 2.231783390045166 to 2.1746405329023086





In [32]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel
from copy import deepcopy

dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

tf_enhanced_sim = QuantizationSimModel(model=deepcopy(model),
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=8,
                           config_file="/home/shayan/Desktop/aimet/my_config.json")

tf_enhanced_sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

2024-12-30 11:00:24,901 - Quant - INFO - Unsupported op type Squeeze
2024-12-30 11:00:24,901 - Quant - INFO - Unsupported op type Mean
2024-12-30 11:00:24,903 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default
('/home/shayan/Desktop/aimet/Examples/torch/quantization/val/9/cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa.jpeg',)


In [34]:
quant_name, quant_wrapper = list(dict(tf_enhanced_sim.model.named_modules()).items())[-2
                                                                                    ]

print(quant_wrapper)
print("-----------------------------")
print(quant_name)
print("-----------------------------")
print("WEIGHT")
print(quant_wrapper.param_quantizers['weight'])
print("BIAS")
print(quant_wrapper.param_quantizers['bias'])
print("INPUT")
for i in quant_wrapper.input_quantizers:
    print(i)
print("OUTPUT")
for i in quant_wrapper.output_quantizers:
    print(i)

StaticGridQuantWrapper(
  (_module_to_wrap): Linear(in_features=512, out_features=1000, bias=True)
)
-----------------------------
fc
-----------------------------
WEIGHT
StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf_enhanced, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-0.7204298377037048, max=0.7204298377037048, delta=0.005672676023095846, offset=-127.0

BIAS
StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf_enhanced, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-0.06204679608345032, max=0.06204679608345032, delta=0.0004885574453510344, offset=-127.0

INPUT
StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf_enhanced, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=False
    no encoding

OUTPUT
StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf_enhanced, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    mi