In [1]:
import torch
import torch.nn as nn
import pytorch_quantization.nn as quant_nn
import torchvision.models as models

In [2]:
# 创建一个只有resnet  layer1[0]及之前部分的网络
class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        resnetmodel = models.resnet50(pretrained=True)
        self.conv1 = resnetmodel.conv1
        self.bn1 = resnetmodel.bn1
        self.relu = resnetmodel.relu
        self.maxpool = resnetmodel.maxpool
        self.layer1 = resnetmodel.layer1[0]
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        return x

model = myModel()
model

myModel(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d

In [3]:
# 第一次导出onnx，查看
# input1 = torch.zeros((1, 3, 224, 224))
# output = model(input1)
# torch.onnx.export(model, input1, "resnet50.onnx", verbose=True, input_names=["input"], output_names=["output"])

In [4]:
import quantize
from pytorch_quantization import quant_modules


In [5]:
# 1.0 主題：解释initialize

# 1.1 打印看一下namedtuple是什么样子。
for item in quant_modules._DEFAULT_QUANT_MAP:
    print(f"==[orig_mod]==".ljust(65), item.orig_mod)
    # print(f"==[torch.nn]==\t\t",torch.nn) # 与上方代码结果一致
    print(f"==[item.mod_name]==".ljust(65), item.mod_name)
    print(f"==[item.replace_mod]==".ljust(65), item.replace_mod)
    print(f"==[item]==".ljust(65), item) # namedtuple
    break

# 1.4 initialize前的信息，作为对比
for item in quant_modules._DEFAULT_QUANT_MAP:
    print(f"==[initialize前,default_quant_desc_input]==".ljust(65), item.replace_mod.default_quant_desc_input)
    print(f"==[initialize前,default_quant_desc_input.calib_method]==".ljust(65), item.replace_mod.default_quant_desc_input.calib_method, "<---------设置calib_method前是max")
    """vars(item.replace_mod.default_quant_desc_input).items()的結果
    dict_items([('_num_bits', 8), ('_name', None), ('_fake_quant', True), ('_axis', None), ('_learn_amax', False), 
    ('_amax', None), ('_scale_amax', None), ('_calib_method', 'max'), ('_unsigned', False), ('_narrow_range', False)])
    """
    print(f"==[initialize前,default_quant_desc_weight]==".ljust(65), item.replace_mod.default_quant_desc_weight)
    print(f"==[initialize前,default_quant_desc_weight.calib_method]==".ljust(65), item.replace_mod.default_quant_desc_weight.calib_method)
    # print(item.replace_mod.input_quantizer)
    # print(item.replace_mod.weight_quantizer)
    break

# 1.2 把initialize从bevfusion中拷贝过来
quantize.initialize()

# 1.5 
print("\n！！修改前，已经有default_quant_desc_input 和 default_quant_desc_weight了！！")
print("！！修改后，default_quant_desc_input的校准器设置为histogram！！\n")
for item in quant_modules._DEFAULT_QUANT_MAP:
    print(f"==[initialize后,default_quant_desc_input]==".ljust(65), item.replace_mod.default_quant_desc_input)
    print(f"==[initialize后,default_quant_desc_input.calib_method]==".ljust(65), item.replace_mod.default_quant_desc_input.calib_method, "<---------设置calib_method后是histogram")
    print(f"==[initialize后,default_quant_desc_weight]==".ljust(65), item.replace_mod.default_quant_desc_weight)
    print(f"==[initialize后,default_quant_desc_weight.calib_method]==".ljust(65), item.replace_mod.default_quant_desc_weight.calib_method)
    break

==[orig_mod]==                                                    <module 'torch.nn' from '/usr/local/lib/python3.8/dist-packages/torch/nn/__init__.py'>
==[item.mod_name]==                                               Conv1d
==[item.replace_mod]==                                            <class 'pytorch_quantization.nn.modules.quant_conv.QuantConv1d'>
==[item]==                                                        quant_entry(orig_mod=<module 'torch.nn' from '/usr/local/lib/python3.8/dist-packages/torch/nn/__init__.py'>, mod_name='Conv1d', replace_mod=<class 'pytorch_quantization.nn.modules.quant_conv.QuantConv1d'>)
==[initialize前,default_quant_desc_input]==                        QuantDescriptor(8bit fake axis= per-tensor)
==[initialize前,default_quant_desc_input.calib_method]==           max <---------设置calib_method前是max
==[initialize前,default_quant_desc_weight]==                       QuantDescriptor(8bit fake axis=0)
==[initialize前,default_quant_desc_weight.calib_method]==     

In [6]:

# 2.0 主题：解释

# 2.1 什么是model._modules
# print(model)
# print(model._modules) #是一个OrderedDict类型。
quantize.replace_to_quantization_module(model)

In [11]:
model

myModel(
  (conv1): QuantConv2d(
    3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
    (_input_quantizer): TensorQuantizer(8bit fake per-tensor amax=dynamic calibrator=HistogramCalibrator scale=1.0 quant)
    (_weight_quantizer): TensorQuantizer(8bit fake axis=0 amax=dynamic calibrator=MaxCalibrator scale=1.0 quant)
  )
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Bottleneck(
    (conv1): QuantConv2d(
      64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
      (_input_quantizer): TensorQuantizer(8bit fake per-tensor amax=dynamic calibrator=HistogramCalibrator scale=1.0 quant)
      (_weight_quantizer): TensorQuantizer(8bit fake axis=0 amax=dynamic calibrator=MaxCalibrator scale=1.0 quant)
    )
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

In [7]:
input1 = torch.zeros((1, 3, 224, 224)).to("cuda")
model.to("cuda")
model.eval()
quant_nn.TensorQuantizer.use_fb_fake_quant = True
with torch.no_grad():
    torch.onnx.export(model, input1, "resnet50_replace.onnx", 
                      verbose=True, 
                      input_names=["input"], 
                      output_names=["output"], 
                      opset_version=13, 
                      do_constant_folding=True)
quant_nn.TensorQuantizer.use_fb_fake_quant = False

  inputs, amax.item() / bound, 0,
  quant_dim = list(amax.shape).index(list(amax_sequeeze.shape)[0])


graph(%input : Float(1, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cuda:0),
      %conv1.weight : Float(64, 3, 7, 7, strides=[147, 49, 7, 1], requires_grad=1, device=cuda:0),
      %bn1.weight : Float(64, strides=[1], requires_grad=1, device=cuda:0),
      %bn1.bias : Float(64, strides=[1], requires_grad=1, device=cuda:0),
      %bn1.running_mean : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %bn1.running_var : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %layer1.conv1.weight : Float(64, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=1, device=cuda:0),
      %layer1.bn1.weight : Float(64, strides=[1], requires_grad=1, device=cuda:0),
      %layer1.bn1.bias : Float(64, strides=[1], requires_grad=1, device=cuda:0),
      %layer1.bn1.running_mean : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %layer1.bn1.running_var : Float(64, strides=[1], requires_grad=0, device=cuda:0),
      %layer1.conv2.weight : Float(64, 

In [8]:
# # 创建一个只有resnet  layer1[0]及之前部分的网络
# class myModel2(nn.Module):
#     def __init__(self):
#         super(myModel2, self).__init__()
#         self.conv1 = nn.Conv2d(3, 5, 3)

#     def forward(self, x):
#         return self.conv1(x)

# model2 = myModel2()
# model2

In [9]:

# quantize.replace_to_quantization_module(model2)
# quantize.set_quantizer_fast(model2)

# torch.manual_seed(0)
# a = torch.randn(1, 3, 224, 224)

# images = [a, a, a, a]
# quantize.calibrate_model(model2, images, "cpu", None, 2)

In [10]:
# input1 = torch.randn((1, 3, 224, 224)).to("cuda").to(torch.float32)
# model2.to("cuda").to(torch.float32)
# # model2.eval()
# quant_nn.TensorQuantizer.use_fb_fake_quant = True
# with torch.no_grad():
#     torch.onnx.export(model2, input1, "aaaa.onnx", 
#                       verbose=True, 
#                       input_names=["input"], 
#                       output_names=["output"], 
#                       opset_version=13, 
#                       do_constant_folding=False)
# quant_nn.TensorQuantizer.use_fb_fake_quant = False