In [1]:
import sys
import os
import torch
import torch.utils.bundled_inputs
import torch.utils.mobile_optimizer
import torch.backends._nnapi.prepare
import torchvision.models.quantization.mobilenet
from pathlib import Path
import flash
import flash.image

In [7]:
@flash.image.ImageClassifier.backbones(name="mobilenet_v2_quant")
def fn_mobilenet_v2_quant(pretrained: bool = True):
    model = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True)    

    # remove the last two layers & turn it into a Sequential model
    # backbone = torch.nn.Sequential(*list(model.children())[:-2])
    
    backbone = model.features
    num_features = model.classifier[-1].in_features
    # backbones need to return the num_features to build the head
    return backbone, num_features

In [31]:
def make_mobilenetv2_nnapi(output_dir_path, quantize_mode, model):
    quantize_core, quantize_iface = {
        "none": (False, False),
        "core": (True, False),
        "full": (True, True),
    }[quantize_mode]

    #model = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True, quantize=quantize_core)
    model.eval()

    # Fuse BatchNorm operators in the floating point model.
    # (Quantized models already have this done.)
    # Remove dropout for this inference-only use case.
    #if not quantize_core:
    #    model.fuse_model()
    #assert type(model.classifier[0]) == torch.nn.Dropout
    #model.classifier[0] = torch.nn.Identity()
    #model.adapter.head
    
    
    input_float = torch.zeros(1, 3, 224, 224)
    input_tensor = input_float

    # If we're doing a quantized model, we need to trace only the quantized core.
    # So capture the quantizer and dequantizer, use them to prepare the input,
    # and replace them with identity modules so we can trace without them.
    if quantize_core:
        quantizer = model.quant
        dequantizer = model.dequant
        model.quant = torch.nn.Identity()
        model.dequant = torch.nn.Identity()
        input_tensor = quantizer(input_float)

    # Many NNAPI backends prefer NHWC tensors, so convert our input to channels_last,
    # and set the "nnapi_nhwc" attribute for the converter.
    input_tensor = input_tensor.contiguous(memory_format=torch.channels_last)
    input_tensor.nnapi_nhwc = True

    # Trace the model.  NNAPI conversion only works with TorchScript models,
    # and traced models are more likely to convert successfully than scripted.
    with torch.no_grad():
        traced = torch.jit.trace(model, input_tensor)
    nnapi_model = torch.backends._nnapi.prepare.convert_model_to_nnapi(traced, input_tensor)

    # If we're not using a quantized interface, wrap a quant/dequant around the core.
    if quantize_core and not quantize_iface:
        nnapi_model = torch.nn.Sequential(quantizer, nnapi_model, dequantizer)
        model.quant = quantizer
        model.dequant = dequantizer
        # Switch back to float input for benchmarking.
        input_tensor = input_float.contiguous(memory_format=torch.channels_last)

    # Optimize the CPU model to make CPU-vs-NNAPI benchmarks fair.
    model = torch.utils.mobile_optimizer.optimize_for_mobile(torch.jit.script(model))

    # Bundle sample inputs with the models for easier benchmarking.
    # This step is optional.
    class BundleWrapper(torch.nn.Module):
        def __init__(self, mod):
            super().__init__()
            self.mod = mod
        def forward(self, arg):
            return self.mod(arg)
    nnapi_model = torch.jit.script(BundleWrapper(nnapi_model))
    torch.utils.bundled_inputs.augment_model_with_bundled_inputs(
        model, [(torch.utils.bundled_inputs.bundle_large_tensor(input_tensor),)])
    torch.utils.bundled_inputs.augment_model_with_bundled_inputs(
        nnapi_model, [(torch.utils.bundled_inputs.bundle_large_tensor(input_tensor),)])

    # Save both models.
    model._save_for_lite_interpreter(str(output_dir_path / ("mobilenetv2-quant_{}-cpu.pt".format(quantize_mode))))
    nnapi_model._save_for_lite_interpreter(str(output_dir_path / ("mobilenetv2-quant_{}-nnapi.pt".format(quantize_mode))))

In [17]:
base_path = Path("/tmp/model")

In [18]:
!ls artifacts/model/model_mobilenet_v2_quant.pt

artifacts/model/model_mobilenet_v2_quant.pt


In [19]:
artifact_model_path="artifacts/model/model_mobilenet_v2_quant.pt"

In [20]:
model = flash.image.ImageClassifier.load_from_checkpoint(artifact_model_path)

In [None]:
model.eval()

    # Fuse BatchNorm operators in the floating point model.
    # (Quantized models already have this done.)
    # Remove dropout for this inference-only use case.
    #if not quantize_core:
    #    model.fuse_model()
    #assert type(model.classifier[0]) == torch.nn.Dropout
    #model.classifier[0] = torch.nn.Identity()
    #model.adapter.head
    
    
input_float = torch.zeros(1, 3, 224, 224)
input_tensor = input_float

# If we're doing a quantized model, we need to trace only the quantized core.
# So capture the quantizer and dequantizer, use them to prepare the input,
# and replace them with identity modules so we can trace without them.
if quantize_core:
    quantizer = model.quant
    dequantizer = model.dequant
    model.quant = torch.nn.Identity()
    model.dequant = torch.nn.Identity()
    input_tensor = quantizer(input_float)

# Many NNAPI backends prefer NHWC tensors, so convert our input to channels_last,
# and set the "nnapi_nhwc" attribute for the converter.
input_tensor = input_tensor.contiguous(memory_format=torch.channels_last)
input_tensor.nnapi_nhwc = True

# Trace the model.  NNAPI conversion only works with TorchScript models,
# and traced models are more likely to convert successfully than scripted.
with torch.no_grad():
    traced = torch.jit.trace(model, input_tensor)
nnapi_model = torch.backends._nnapi.prepare.convert_model_to_nnapi(traced, input_tensor)

In [32]:
for quantize_mode in ["none", "core", "full"]:
    #model = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True, quantize=quantize_core)    
    
    #model = torchvision.models.mobilenet.mobilenet_v2(pretrained=True)    
    make_mobilenetv2_nnapi(base_path / "mobilenetv2-nnapi", quantize_mode, model)

  if hasattr(mod, name):
  if hasattr(mod, name):
  item = getattr(mod, name)


AssertionError: 

In [22]:
model

ImageClassifier(
  (train_metrics): ModuleDict(
    (accuracy): Accuracy()
  )
  (val_metrics): ModuleDict(
    (accuracy): Accuracy()
  )
  (test_metrics): ModuleDict(
    (accuracy): Accuracy()
  )
  (adapter): DefaultAdapter(
    (backbone): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (1): QuantizableInvertedResidual(
        (conv): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU()
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running

In [29]:
model.adapter.head
model_quant.classifier

Sequential(
  (0): Linear(in_features=1280, out_features=101, bias=True)
)

In [23]:
#model = torchvision.models.mobilenet.mobilenet_v2(pretrained=True)    
model_quant = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True)    


In [30]:
model_quant.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [None]:
model.classifier

In [None]:
??model_quant.fuse_model

In [None]:
model_quant.fuse_model()

In [None]:
model = torchvision.models.mobilenet.mobilenet_v2(pretrained=True)    

In [None]:
!rm -rf /tmp/model/mobilenetv2-nnapi 

In [None]:
!mkdir -p /tmp/model/mobilenetv2-nnapi 

In [None]:
!ls  /tmp/model/mobilenetv2-nnapi

In [None]:
model = torchvision.models.quantization.mobilenet.mobilenet_v2(pretrained=True)

In [None]:
?torchvision.models.quantization.mobilenet.mobilenet_v2

In [None]:
model