In [1]:
from cn_clip.clip import tokenize, image_transform
from cn_clip.clip.utils import _MODEL_INFO
import onnxruntime
import os
from pathlib import Path
import numpy as np
from PIL import Image
import torch

## 文字编码

In [2]:
text_onnx_model_path = Path('.') / 'data_path' / 'txt.fp32.onnx'

In [3]:
text_sess_options = onnxruntime.SessionOptions()
text_run_options = onnxruntime.RunOptions()
text_run_options.log_severity_level = 2
text_session = onnxruntime.InferenceSession(str(text_onnx_model_path.absolute()), sess_options=text_sess_options)

In [4]:
r = text_session.run(None, { 'text': tokenize('皮卡丘').numpy() })
text_vector = r[0]
text_vector = text_vector / np.linalg.norm(text_vector[0, :])
text_features = torch.tensor(r[0])
text_features = text_features / text_features.norm(dim=-1, keepdim=True) # 归一化后的Chinese-CLIP文本特征，用于下游任务

In [5]:
np.savetxt('text-result.text', r[0], fmt='%.8f')

## 图像编码

In [6]:
img_onnx_model_path = Path('.') / 'data_path' / 'img.fp32.onnx'

In [7]:
img_sess_options = onnxruntime.SessionOptions()
img_run_options = onnxruntime.RunOptions()
img_run_options.log_severity_level = 2
img_session = onnxruntime.InferenceSession(str(img_onnx_model_path.absolute()), sess_options=img_sess_options)

In [8]:
model_arch = "ViT-B-16"
preprocess = image_transform(_MODEL_INFO[model_arch]['input_resolution'])
# 示例皮卡丘图片，预处理后得到[1, 3, 分辨率, 分辨率]尺寸的Torch Tensor
image = preprocess(Image.open("examples/pokemon.jpeg")).unsqueeze(0)

In [9]:
img_res = img_session.run(None, { "image": image.numpy() })

In [10]:
image_vector = img_res[0]
image_vector = image_vector / np.linalg.norm(image_vector[0, :])
image_features = torch.tensor(img_res[0])
image_features /= image_features.norm(dim=-1, keepdim=True) # 归一化后的Chinese-CLIP图像特征，用于下游任务

In [11]:
np.savetxt('image-result.text', img_res[0], fmt='%.8f')

In [12]:
_MODEL_INFO

{'ViT-B-16': {'struct': 'ViT-B-16@RoBERTa-wwm-ext-base-chinese',
  'input_resolution': 224},
 'ViT-L-14': {'struct': 'ViT-L-14@RoBERTa-wwm-ext-base-chinese',
  'input_resolution': 224},
 'ViT-L-14-336': {'struct': 'ViT-L-14-336@RoBERTa-wwm-ext-base-chinese',
  'input_resolution': 336},
 'ViT-H-14': {'struct': 'ViT-H-14@RoBERTa-wwm-ext-large-chinese',
  'input_resolution': 224},
 'RN50': {'struct': 'RN50@RBT3-chinese', 'input_resolution': 224}}

In [13]:
image_features @ text_features.t()

tensor([[0.4738]])

In [14]:
np.dot(image_vector, text_vector.T)

array([[0.4737627]], dtype=float32)

## 量化模型

量化过程可以参考[官方文档](https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#quantizing-an-onnx-model)和[example](https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md)

In [15]:
from onnxruntime.quantization import quantize_dynamic, QuantType, quant_pre_process

### 量化文字编码模型

In [16]:
text_quant_model_path = text_onnx_model_path.absolute().parent / 'txt.quant.onnx'
pre_text_quant_model_path = text_onnx_model_path.absolute().parent / 'txt.pre.quant.onnx'

In [17]:
# quant pre process 可以使用后面这个链接中的两个命令替代 https://github.com/microsoft/onnxruntime/issues/10484#issuecomment-1780303401
# quant_pre_process(text_onnx_model_path, pre_text_quant_model_path)

直接是无法使用`quant_pre_process`进行预处理，可以参考[这里](https://github.com/microsoft/onnxruntime/issues/10484#issuecomment-1780303401)进行预处理，先是使用optimizer然后再使用预处理。

**但是不进行预处理不影响最终的结果**。

In [18]:
quantize_dynamic(pre_text_quant_model_path, text_quant_model_path)

In [19]:
text_quant_session = onnxruntime.InferenceSession(str(text_quant_model_path.absolute()), sess_options=onnxruntime.SessionOptions())

In [20]:
r = text_quant_session.run(None, { 'text': tokenize('皮卡丘').numpy() })
text_quant_features = torch.tensor(r[0])
text_quant_features = text_quant_features / text_quant_features.norm(dim=-1, keepdim=True) # 归一化后的Chinese-CLIP文本特征，用于下游任务

In [21]:
text_quant_features @ text_features.t()

tensor([[0.7831]])

### 量化图像编码模型

In [22]:
img_quant_model_path = img_onnx_model_path.absolute().parent / 'img.quant.onnx'
pre_img_quant_model_path = img_onnx_model_path.absolute().parent / 'img.pre.quant.onnx'

In [23]:
quant_pre_process(img_onnx_model_path, pre_img_quant_model_path)

In [24]:
# 无法使用quantize_dynamic自动量化
# https://github.com/microsoft/onnxruntime/issues/15888
quantize_dynamic(pre_img_quant_model_path, img_quant_model_path, weight_type=QuantType.QInt8, nodes_to_exclude=['Conv_1'])

Ignore MatMul due to non constant B: /[MatMul_105]
Ignore MatMul due to non constant B: /[MatMul_107]
Ignore MatMul due to non constant B: /[MatMul_222]
Ignore MatMul due to non constant B: /[MatMul_224]
Ignore MatMul due to non constant B: /[MatMul_339]
Ignore MatMul due to non constant B: /[MatMul_341]
Ignore MatMul due to non constant B: /[MatMul_456]
Ignore MatMul due to non constant B: /[MatMul_458]
Ignore MatMul due to non constant B: /[MatMul_573]
Ignore MatMul due to non constant B: /[MatMul_575]
Ignore MatMul due to non constant B: /[MatMul_690]
Ignore MatMul due to non constant B: /[MatMul_692]
Ignore MatMul due to non constant B: /[MatMul_807]
Ignore MatMul due to non constant B: /[MatMul_809]
Ignore MatMul due to non constant B: /[MatMul_924]
Ignore MatMul due to non constant B: /[MatMul_926]
Ignore MatMul due to non constant B: /[MatMul_1041]
Ignore MatMul due to non constant B: /[MatMul_1043]
Ignore MatMul due to non constant B: /[MatMul_1158]
Ignore MatMul due to non con

> `nodes_to_exclude=['Conv_1'])` is necessary, to leave Conv layer as it is since `ConvInteger(10)` is NOT implemented in onnxruntime with data type `int8`.

这里和[这个issue里面](https://github.com/microsoft/onnxruntime/issues/15888)使用的名称不同，可以参考下面列出onnx的nodes的代码，原因应该是这一层名称被改了。[PicQuery](https://github.com/greyovo/PicQuery/blob/master/README_zh.md#%E6%9E%84%E5%BB%BA%E8%BF%90%E8%A1%8C)里面也是这样的，但他使用的也是标准的clip模型，和前面issue中做法类似。

In [25]:
img_quant_session = onnxruntime.InferenceSession(str(img_quant_model_path.absolute()), sess_options=onnxruntime.SessionOptions())

In [26]:
# 使用的上面已经处理好的图片
img_quant_res = img_quant_session.run(None, { "image": image.numpy() })

In [27]:
image_quant_features = torch.tensor(img_quant_res[0])
image_quant_features /= image_quant_features.norm(dim=-1, keepdim=True) # 归一化后的Chinese-CLIP图像特征，用于下游任务

In [28]:
image_features @ image_quant_features.t()

tensor([[0.9875]])

In [32]:
print('量化之后的结果', image_quant_features @ text_quant_features.t())
print('量化之前的结果', image_features @ text_features.t())

量化之后的结果 tensor([[0.4319]])
量化之前的结果 tensor([[0.4738]])


## List onnx nodes

In [29]:
import onnx

# 加载ONNX模型
model = onnx.load(img_quant_model_path)

# 遍历并打印节点名称
for node in model.graph.node:
    print(node.name)

Conv_1
Reshape_12
Concat_17
Add_19
ReduceMean_21
Sub_22
Pow_24
ReduceMean_25
Add_27
Sqrt_28
Div_29
Mul_30
Add_31
ReduceMean_35
Transpose_33
Sub_36
Transpose_token_47
Pow_38
ReduceMean_39
Add_41
Sqrt_42
Div_43
Mul_44
Add_45
onnx::Cast_398_QuantizeLinear
MatMul_52_quant_scales_mul
MatMul_52_quant
onnx::Add_405_output_quantized_cast
MatMul_52_quant_output_scale_mul
Add_53
Slice_67
Slice_64
Slice_70
Reshape_90
Reshape_79
Reshape_100
Transpose_104
Transpose_80
Transpose_101
Div_103
MatMul_105
Softmax_106
MatMul_107
Transpose_108
Reshape_115
onnx::Gemm_468_QuantizeLinear
Gemm_116_MatMul_quant_scales_mul
Gemm_116_MatMul_quant
onnx::Reshape_469_MatMul_output_quantized_cast
Gemm_116_MatMul_quant_output_scale_mul
Gemm_116_Add
Reshape_125
Add_126
ReduceMean_128
Sub_129
Pow_131
ReduceMean_132
Add_134
Sqrt_135
Div_136
Mul_137
Add_138
onnx::Cast_491_QuantizeLinear
MatMul_141_quant_scales_mul
MatMul_141_quant
onnx::Add_494_output_quantized_cast
MatMul_141_quant_output_scale_mul
Add_142
Mul_144
Sigmoi

## 版本依赖
第一个是cn_clip建议的依赖，导出onnx的时候使用的这个依赖。第二是后续解决量化导出的时候更新的依赖，但根据最后的解决结果看使用第一个依赖运行应该也是可以的，但没有事件测试。

### 从pyTorch导出的依赖
- onnx                      1.13.0
- onnxconverter-common      1.14.0
- onnxmltools               1.11.1
- onnxruntime               1.13.1
- torch                     1.12.1
- torchvision               0.13.1

### 运行当前notebook的依赖
- onnx                      1.14.0
- onnxconverter-common      1.13.0
- onnxmltools               1.11.1
- onnxruntime               1.16.1
- torch                     1.12.1
- torchvision               0.13.1

In [30]:
%pip list

Package                   Version
------------------------- -----------
anyio                     4.0.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 2.4.0
async-lru                 2.0.4
attrs                     23.1.0
Babel                     2.13.0
backcall                  0.2.0
beautifulsoup4            4.12.2
bleach                    6.1.0
certifi                   2023.7.22
cffi                      1.16.0
charset-normalizer        3.3.0
colorama                  0.4.6
coloredlogs               15.0.1
comm                      0.1.4
contourpy                 1.1.1
cycler                    0.12.1
debugpy                   1.8.0
decorator                 5.1.1
defusedxml                0.7.1
exceptiongroup            1.1.3
executing                 2.0.0
fastjsonschema            2.18.1
filelock                  3.13.1
flatbuffers               23.5.26
fonttools                 4.43.1
fqdn           