In [1]:
import numpy as np
import onnxruntime as ort
import timm
import torch
from onnxruntime.quantization import CalibrationDataReader, CalibrationMethod, quantize_static

from src.seed import seed_everything

seed_everything(42)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(ort.get_available_providers())


['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']


In [4]:
img_height = 224
img_width = 224
x = torch.randn(1, 3, img_height, img_width)

model = timm.create_model("resnet50", pretrained=True, num_classes=10)
model(x)


tensor([[ 0.1005, -0.0164,  0.0238, -0.0473, -0.0282, -0.0354,  0.0018,  0.0091,
          0.0022, -0.0017]], grad_fn=<AddmmBackward0>)

In [5]:
torch.onnx.export(
    model,
    x,
    "model.onnx",
    export_params=True,
    opset_version=12,
    do_constant_folding=True,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
)


In [17]:
# providers = ["CPUExecutionProvider"] # CPU環境
providers = ["CUDAExecutionProvider"] # GPU環境

session_fp32 = ort.InferenceSession("model.onnx", providers=providers)

# 入力はfloat32で渡す
x = np.random.randn(4, 3, img_height, img_width).astype(np.float32)

# 推論
output = session_fp32.run([], {"input": x})[0]
print(f"{output=}")


output=array([[ 0.04625579,  0.02774971,  0.05941668,  0.11544025, -0.04251363,
        -0.04008248, -0.07649988,  0.09241282, -0.01849725,  0.02181285],
       [ 0.04758538,  0.01739735,  0.0605131 ,  0.12453301, -0.05036511,
        -0.04451123, -0.08644277,  0.09327842, -0.0261997 ,  0.01296974],
       [ 0.04499255,  0.03298238,  0.05564933,  0.11027539, -0.05724603,
        -0.04432326, -0.08007257,  0.09728942, -0.02808353,  0.02220638],
       [ 0.047485  ,  0.01826401,  0.05600831,  0.10583325, -0.05987557,
        -0.05351613, -0.06847225,  0.09310697, -0.02715121,  0.01293   ]],
      dtype=float32)


In [20]:
# ONNXモデルの最適化を行う
!python -m onnxruntime.quantization.preprocess --input model.onnx --output model-infer.onnx

print("ONNXモデルの最適化が完了しました。")


ONNXモデルの量子化が完了しました。


In [21]:
class ImgDataReader(CalibrationDataReader):
    def __init__(self, imgs: np.ndarray) -> None:
        self.imgs = imgs  # 形状(Batch, C, H, W)
        self.img_dicts = iter([{"input": img[np.newaxis]} for img in self.imgs])
        self.datasize = len(self.imgs)

    def get_next(self) -> dict[str, np.ndarray] | None:
        # git_next関数は{"input": ndarray形式}で返す。全て返し終わったらNone
        return next(self.img_dicts, None)


In [22]:
# imgsは形状が（Batch, C, H, W）の画像データとする
imgs = np.random.randn(10, 3, img_height, img_width).astype(np.float32)
input_model_path = "model-infer.onnx"
output_model_path = "model-sq.onnx"
data_reader = ImgDataReader(imgs)
method = CalibrationMethod.MinMax
quantize_static(input_model_path, output_model_path, data_reader, calibrate_method=method)
