<a href="https://colab.research.google.com/github/mahinlma/pytorch_quantization/blob/main/pytorch_quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

All pre-trained models expect input images normalized in the same way,
i.e. mini-batches of 3-channel RGB images of shape `(3 x H x W)`, where `H` and `W` are expected to be at least `224`.
The images have to be loaded in to a range of `[0, 1]` and then normalized using `mean = [0.485, 0.456, 0.406]`
and `std = [0.229, 0.224, 0.225]`.

Here's a sample execution.

In [23]:
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [24]:
def inference(model):
  with torch.no_grad():
    output = model(input_batch)
  
  probabilities = torch.nn.functional.softmax(output[0], dim=0)
  results(probabilities)

In [25]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
# if torch.cuda.is_available():
#     input_batch = input_batch.to('cuda')
#     model.to('cuda')



In [26]:
# Download ImageNet labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

--2022-06-21 08:51:47--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt.1’


2022-06-21 08:51:48 (52.8 MB/s) - ‘imagenet_classes.txt.1’ saved [10472/10472]



In [27]:
# Read the categories
def results(probabilities):
  with open("imagenet_classes.txt", "r") as f:
      categories = [s.strip() for s in f.readlines()]
  # Show top categories per image
  top5_prob, top5_catid = torch.topk(probabilities, 5)
  for i in range(top5_prob.size(0)):
      print(categories[top5_catid[i]], top5_prob[i].item())

In [28]:
import copy
model_fp32 = copy.deepcopy(model)
torchscript_model = torch.jit.script(model_fp32.eval())
#print(torchscript_model)

In [29]:

inference(torchscript_model)

Samoyed 0.8846225142478943
Arctic fox 0.045805174857378006
white wolf 0.0442761555314064
Pomeranian 0.005621383432298899
Great Pyrenees 0.004652013536542654


In [30]:
torchscript_model_opt = torch.jit.optimize_for_inference(torchscript_model)
print(torchscript_model_opt)

RecursiveScriptModule(original_name=ResNet)


In [31]:
inference(torchscript_model_opt)

Samoyed 0.8846223950386047
Arctic fox 0.04580499231815338
white wolf 0.044276442378759384
Pomeranian 0.005621393211185932
Great Pyrenees 0.004652039613574743


In [32]:
model_fp32 = copy.deepcopy(model)

resnet_dq_int8 = torch.quantization.quantize_dynamic(
    model_fp32,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8) 

In [33]:
inference(resnet_dq_int8)

Samoyed 0.883510947227478
Arctic fox 0.047078829258680344
white wolf 0.04523681104183197
Pomeranian 0.005195914302021265
Great Pyrenees 0.004503224976360798


In [39]:
torchscript_model_int8 = torch.jit.script(resnet_dq_int8)


In [40]:
inference(torchscript_model_int8)

Samoyed 0.883510947227478
Arctic fox 0.047078829258680344
white wolf 0.04523681104183197
Pomeranian 0.005195914302021265
Great Pyrenees 0.004503224976360798


In [41]:
torchscript_model_int8_1 = torch.jit.optimize_for_inference(torchscript_model_int8)
print(torchscript_model_int8_1)

RecursiveScriptModule(original_name=ResNet)


In [42]:
inference(torchscript_model_int8_1)

Samoyed 0.883510947227478
Arctic fox 0.04707878455519676
white wolf 0.04523677006363869
Pomeranian 0.005195904523134232
Great Pyrenees 0.004503220319747925


In [53]:
torchscript_quant_model = torch.quantization.prepare_jit(torchscript_model, {"":torch.quantization.default_qconfig}, True)

for _ in range(100):
    torchscript_quant_model(input_batch)

torchscript_quant_model = torch.quantization.convert_jit(torchscript_quant_model)
print(torchscript_quant_model)

RecursiveScriptModule(original_name=ResNet)


  return self._grad


In [54]:
torchscript_quant_model.graph


graph(%self : __torch__.torchvision.models.resnet.___torch_mangle_780.ResNet,
      %x.2 : Tensor):
  %15 : int = prim::Constant[value=-1]()
  %14 : int = prim::Constant[value=1]() # /root/.cache/torch/hub/pytorch_vision_v0.10.0/torchvision/models/resnet.py:243:29
  %13 : int[] = prim::Constant[value=[2, 2]]()
  %12 : int[] = prim::Constant[value=[3, 3]]()
  %11 : int[] = prim::Constant[value=[1, 1]]()
  %10 : int = prim::Constant[value=2]() # /usr/local/lib/python3.7/dist-packages/torch/nn/modules/pooling.py:162:53
  %9 : bool = prim::Constant[value=0]() # /usr/local/lib/python3.7/dist-packages/torch/nn/modules/pooling.py:163:57
  %7 : str = prim::Constant[value="AssertionError: "]()
  %6 : NoneType = prim::Constant()
  %self.4_zero_point_0 : int = prim::Constant[value=38]()
  %self.4_scale_0 : float = prim::Constant[value=0.18226641416549683]()
  %self.layer4.1.out.26_scale_0 : float = prim::Constant[value=0.16433815658092499]()
  %self.layer4.1.out.30_zero_point_0 : int = prim::Cons

In [55]:
inference(torchscript_quant_model)

Samoyed 0.9468300938606262
Arctic fox 0.0206048171967268
white wolf 0.017171630635857582
Pomeranian 0.00399533286690712
Great Pyrenees 0.00399533286690712


Samoyed 0.9468300938606262
Arctic fox 0.0206048171967268
white wolf 0.017171630635857582
Pomeranian 0.00399533286690712
Great Pyrenees 0.00399533286690712


TypeError: ignored

In [34]:
#def fuse_layers(model_fp32):
import torchvision
#model_fp32 = copy.deepcopy(model)
#torch.quantization.fuse_modules(model_fp32, [[conv1],[bn1]])
#model_fp32.eval()
def fuse_model(model_fp32):
    torch.quantization.fuse_modules(model_fp32, ['conv1','bn1','relu'],inplace=True)

    for m in model_fp32.modules():
        
        if type(m) == torch.nn.modules.container.Sequential:

            for m1 in m:
                #print(type(m1))

                if (type(m1)) == torchvision.models.resnet.BasicBlock:

                    torch.quantization.fuse_modules(m1, [['conv1','bn1','relu'],['conv2','bn2']],inplace=True)
                    if m1.downsample is not None and type(m1.downsample) == torch.nn.modules.container.Sequential:
                        torch.quantization.fuse_modules(m1.downsample, ['0','1'],inplace=True)

                        #print("#",m1.downsample)
                    #if (type(m1)) ==

    return model_fp32



In [35]:
class QuantizedResNet18(torch.nn.Module):
    def __init__(self,model_fp32):
        super(QuantizedResNet18, self).__init__()
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()
        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()
        # FP32 model
        self.model_fp32 = model_fp32

    def forward(self, x):
        # manually specify where tensors will be converted from floating
        # point to quantized in the quantized model
        x = self.quant(x)
        x = self.model_fp32(x)
        # manually specify where tensors will be converted from quantized
        # to floating point in the quantized model
        x = self.dequant(x)
        return x

In [37]:
model_fp32 = copy.deepcopy(model)
model_fp32.eval()
fused_model = fuse_model(model_fp32)

quantized_model = QuantizedResNet18(model_fp32=fused_model)

                
quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')


model_fp32_prepared = torch.quantization.prepare(quantized_model, inplace=True)
                           
model_fp32_prepared(input_batch)

model_int8 = torch.quantization.convert(model_fp32_prepared, inplace=True)

print(model_int8)
#torch.save(model_int8, "resnet_static_quantized_model.pt")

  reduce_range will be deprecated in a future release of PyTorch."


QuantizedResNet18(
  (quant): Quantize(scale=tensor([0.0373]), zero_point=tensor([56]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (model_fp32): ResNet(
    (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.017348434776067734, zero_point=0, padding=(3, 3))
    (bn1): Identity()
    (relu): Identity()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.009992317296564579, zero_point=0, padding=(1, 1))
        (bn1): Identity()
        (relu): Identity()
        (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.046314314007759094, zero_point=73, padding=(1, 1))
        (bn2): Identity()
      )
      (1): BasicBlock(
        (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.021600106731057167, zero_point=0, padding=(1, 1))
    

In [38]:
inference(model_int8)

NotImplementedError: ignored