In [1]:
# load device
import torch

device = torch.device("cpu")

In [2]:
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer
import torch

model_id = "./sougou_test_trainer_256/checkpoint-96"
onnx_path = "./sougou_test_trainer_256/onnx_256"

# load vanilla transformers and convert to onnx
model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

The argument `from_transformers` is deprecated, and will be removed in optimum 2.0.  Use `export` instead
Framework not specified. Using pt to export to ONNX.
Using framework PyTorch: 1.13.1+cpu
Overriding 1 configuration item(s)
	- use_cache -> False


('./sougou_test_trainer_256/onnx_256\\tokenizer_config.json',
 './sougou_test_trainer_256/onnx_256\\special_tokens_map.json',
 './sougou_test_trainer_256/onnx_256\\vocab.txt',
 './sougou_test_trainer_256/onnx_256\\added_tokens.json',
 './sougou_test_trainer_256/onnx_256\\tokenizer.json')

In [3]:
from transformers import pipeline

vanilla_clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
vanilla_clf("这期节目继续关注中国篮球的话题。众所周知，我们已经结束了男篮世界杯的所有赛程，一胜四负的一个成绩，甚至比上一届的世界杯成绩还要差。因为这一次我们连奥运会落选赛也都没有资格参加，所以，连续两次错过了巴黎奥运会的话，对于中国篮协，还有对于姚明来说，确实成为了他任职的一个最大的败笔。对于球迷非常关注的一个话题，乔尔杰维奇是否下课，可能对于这个悬念来说也都是暂时有答案了。")

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


[{'label': 'LABEL_0', 'score': 0.9963239431381226}]

In [4]:
vanilla_clf("9月4日上午，参加中泰“蓝色突击-2023”海军联合训练的双方海军陆战分队，在泰军陆战队轻武器射击场展开冲锋枪、手枪实弹射击，标志着本次联训活动全面展开。按照本次双方达成的联训计划，联训双方在混合编组、示范讲解、相互体验对方武器后，随即在陆上、水面等多个场地同时展开装甲战术、防化、土制炸弹搜排、机降等课目训练。在实弹射击现场，记者看到中泰两军队员统一编组，每组各5名队员上场射击，在规定的时间内完成立姿、跪姿，向左向右转向的轻武器射击。")

[{'label': 'LABEL_2', 'score': 0.9959322810173035}]

In [5]:
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig

# create ORTOptimizer and define optimization configuration
optimizer = ORTOptimizer.from_pretrained(model)
optimization_config = OptimizationConfig(optimization_level=99) # enable all optimizations

# apply the optimization configuration to the model
optimizer.optimize(
    save_dir=onnx_path,
    optimization_config=optimization_config,
)

Optimizing model...
Configuration saved in sougou_test_trainer_256\onnx_256\ort_config.json
Optimized model saved at: sougou_test_trainer_256\onnx_256 (external data format: False; saved all tensor to one file: True)


WindowsPath('sougou_test_trainer_256/onnx_256')

In [6]:
from transformers import pipeline

# load optimized model
optimized_model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="model_optimized.onnx")

# create optimized pipeline
optimized_clf = pipeline("text-classification", model=optimized_model, tokenizer=tokenizer)
optimized_clf("今年7月，教育部等四部门联合印发了《关于在深化非学科类校外培训治理中加强艺考培训规范管理的通知》（以下简称《通知》）。《通知》针对近年来校外艺术培训的状况而发布，并从源头就校外艺术培训机构的“培训主体、从业人员、招生行为、安全底线”等方面进行严格规范。校外艺术培训之所以火热，主要在于高中阶段艺术教育发展迟滞于学生需求。分析教育部数据，2021年艺术学科在校生占比为9.84%，高于2020年的9.73%；2020至2021年艺术学科在校生的年增长率为5.04%，远高于4.28%的总在校生年增长率。增长的数据，是近年来艺考招生连年火热的缩影，在未来一段时间内，艺考或将在全国范围内继续保持高热度。")


[{'label': 'LABEL_3', 'score': 0.9926980137825012}]

In [8]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

# create ORTQuantizer and define quantization configuration
dynamic_quantizer = ORTQuantizer.from_pretrained(optimized_model)
dqconfig = AutoQuantizationConfig.avx2(is_static=False, per_channel=False)

# apply the quantization configuration to the model
model_quantized_path = dynamic_quantizer.quantize(
    save_dir=onnx_path,
    quantization_config=dqconfig,
)


Creating dynamic quantizer: QOperator (mode: IntegerOps, schema: u8/u8, channel-wise: False)
Quantizing model...
Saving quantized model at: sougou_test_trainer_256\onnx_256 (external data format: False)
Configuration saved in sougou_test_trainer_256\onnx_256\ort_config.json


In [9]:
import os

# get model file size
size = os.path.getsize(os.path.join(onnx_path, "model_optimized.onnx"))/(1024*1024)
quantized_model = os.path.getsize(os.path.join(onnx_path, "model_optimized_quantized.onnx"))/(1024*1024)

print(f"Model file size: {size:.2f} MB")
print(f"Quantized Model file size: {quantized_model:.2f} MB")

Model file size: 390.17 MB
Quantized Model file size: 97.98 MB


In [10]:
# load quantization model
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer

quantized_model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="model_optimized_quantized.onnx").to(device)
tokenizer = AutoTokenizer.from_pretrained(onnx_path)

The ONNX file model_optimized_quantized.onnx is not a regular name used in optimum.onnxruntime, the ORTModel might not behave as expected.


In [12]:
import pandas as pd

test_df = pd.read_csv("./data/sougou/test.csv")

In [None]:
# original model evaluate
import numpy as np
import time

cost_time_list = []
s_time = time.time()
true_labels, pred_labels = [], [] 
for i, row in test_df.iterrows():
    row_s_time = time.time()
    true_labels.append(row["label"])
    encoded_text = tokenizer(row['text'], max_length=256, truncation=True, padding=True, return_tensors='pt')
    # print(encoded_text)
    logits = model(**encoded_text)
    label_id = np.argmax(logits[0].detach().numpy(), axis=1)[0]
    pred_labels.append(label_id)
    cost_time_list.append((time.time() - row_s_time) * 1000)
    print(i, (time.time() - row_s_time) * 1000, label_id)

print("avg time:", (time.time() - s_time) * 1000 / test_df.shape[0])
print("P50 time:", np.percentile(np.array(cost_time_list), 50))
print("P95 time:", np.percentile(np.array(cost_time_list), 95))

0 710.2577686309814 0
1 335.86716651916504 0
2 309.1742992401123 0
3 272.2747325897217 0
4 276.2594223022461 0
5 300.19688606262207 0
6 289.229154586792 0
7 435.8360767364502 0
8 551.5248775482178 0
9 494.6787357330322 0
10 538.5622978210449 0
11 529.5844078063965 0
12 517.615795135498 0
13 517.6143646240234 0
14 530.5883884429932 0
15 513.6265754699707 0
16 681.1802387237549 0
17 597.412109375 0
18 490.68593978881836 0
19 551.5351295471191 0
20 497.6680278778076 0
21 548.5348701477051 0
22 484.7068786621094 0
23 476.72295570373535 0
24 515.6240463256836 0
25 518.6114311218262 0
26 513.6275291442871 0
27 534.571647644043 0
28 463.7608528137207 0
29 358.04271697998047 0
30 420.87626457214355 0
31 520.6091403961182 0
32 500.66423416137695 0
33 498.66771697998047 0
34 533.5755348205566 0
35 545.539379119873 0
36 396.9380855560303 0
37 345.0775146484375 0
38 536.5688800811768 0
39 485.7027530670166 0
40 461.76671981811523 0
41 497.66993522644043 0
42 510.6334686279297 0
43 535.576343536377

345 525.5966186523438 0
346 495.67294120788574 3
347 462.7642631530762 3
348 549.530029296875 3
349 474.73835945129395 3
350 496.6702461242676 3
351 497.67017364501953 3
352 509.63783264160156 3
353 416.8853759765625 3
354 489.69316482543945 3
355 521.611213684082 3
356 464.75768089294434 3
357 529.5829772949219 3
358 504.6534538269043 3
359 458.77623558044434 3
360 502.6583671569824 3
361 522.6025581359863 3


In [33]:
from sklearn.metrics import classification_report

print(classification_report(true_labels, pred_labels, digits=4))

              precision    recall  f1-score   support

           0     0.9802    1.0000    0.9900        99
           1     0.9789    0.9394    0.9588        99
           2     0.9900    1.0000    0.9950        99
           3     0.9223    0.9596    0.9406        99
           4     0.9896    0.9596    0.9744        99

    accuracy                         0.9717       495
   macro avg     0.9722    0.9717    0.9717       495
weighted avg     0.9722    0.9717    0.9717       495



In [13]:
# optimized model evaluate
import numpy as np
import time

cost_time_list = []
s_time = time.time()
true_labels, pred_labels = [], [] 
for i, row in test_df.iterrows():
    row_s_time = time.time()
    true_labels.append(row["label"])
    encoded_text = tokenizer(row['text'], max_length=256, truncation=True, padding=True, return_tensors='pt')
    # print(encoded_text)
    logits = optimized_model(**encoded_text)
    label_id = np.argmax(logits[0].detach().numpy(), axis=1)[0]
    pred_labels.append(label_id)
    cost_time_list.append((time.time() - row_s_time) * 1000)
    print(i, (time.time() - row_s_time) * 1000, label_id)

print("avg time:", (time.time() - s_time) * 1000 / test_df.shape[0])
print("P50 time:", np.percentile(np.array(cost_time_list), 50))
print("P95 time:", np.percentile(np.array(cost_time_list), 95))

0 314.73684310913086 0
1 273.2686996459961 0
2 230.38601875305176 0
3 288.22851181030273 0
4 265.2907371520996 0
5 261.30223274230957 0
6 264.2958164215088 0
7 271.2728977203369 0
8 277.2669792175293 0
9 270.26915550231934 0
10 269.2914009094238 0
11 278.2604694366455 0
12 268.2831287384033 0
13 288.23041915893555 0
14 356.0481071472168 0
15 528.5873413085938 0
16 630.3153038024902 0
17 480.71861267089844 0
18 542.5517559051514 0
19 576.000452041626 0
20 504.6522617340088 0
21 543.546199798584 0
22 509.6397399902344 0
23 522.6023197174072 0
24 569.4782733917236 0
25 695.1415538787842 0
26 642.2834396362305 0
27 666.5890216827393 0
28 487.6999855041504 0
29 301.1953830718994 0
30 397.93896675109863 0
31 508.6407661437988 0
32 481.7161560058594 0
33 531.5783023834229 0
34 619.3451881408691 0
35 535.5668067932129 0
36 378.98778915405273 0
37 325.1330852508545 0
38 475.73184967041016 0
39 558.5041046142578 0
40 485.7015609741211 0
41 556.5149784088135 0
42 810.0721836090088 0
43 653.253078

343 479.7201156616211 3
344 591.4168357849121 3
345 480.71813583374023 0
346 457.7770233154297 3
347 380.9816837310791 3
348 453.78756523132324 3
349 433.851957321167 3
350 538.5606288909912 3
351 555.516242980957 3
352 493.6800003051758 3
353 397.93872833251953 3
354 444.8070526123047 3
355 477.724552154541 3
356 531.5799713134766 3
357 481.7066192626953 3
358 460.7682228088379 3
359 606.3888072967529 3
360 471.73595428466797 3
361 479.7215461730957 3
362 454.7839164733887 3
363 542.5505638122559 3
364 583.120584487915 3
365 507.60793685913086 3
366 447.8025436401367 3
367 500.7047653198242 3
368 495.70488929748535 3
369 485.7003688812256 3
370 467.7901268005371 3
371 401.958703994751 3
372 527.6272296905518 3
373 541.5878295898438 3
374 442.8532123565674 3
375 508.64505767822266 3
376 529.5860767364502 3
377 474.7653007507324 3
378 461.79986000061035 3
379 492.6447868347168 3
380 440.8245086669922 3
381 445.80531120300293 3
382 567.5957202911377 3
383 570.6017017364502 3
384 439.8217

In [14]:
from sklearn.metrics import classification_report

print(classification_report(true_labels, pred_labels, digits=4))

              precision    recall  f1-score   support

           0     0.9802    1.0000    0.9900        99
           1     0.9789    0.9394    0.9588        99
           2     0.9900    1.0000    0.9950        99
           3     0.9223    0.9596    0.9406        99
           4     0.9896    0.9596    0.9744        99

    accuracy                         0.9717       495
   macro avg     0.9722    0.9717    0.9717       495
weighted avg     0.9722    0.9717    0.9717       495



In [15]:
import numpy as np
import time

cost_time_list = []
s_time = time.time()
true_labels, pred_labels = [], [] 
for i, row in test_df.iterrows():
    row_s_time = time.time()
    true_labels.append(row["label"])
    encoded_text = tokenizer(row['text'], max_length=256, truncation=True, padding=True, return_tensors='pt').to(device)
    # print(encoded_text)
    logits = quantized_model(**encoded_text)
    label_id = np.argmax(logits[0].detach().numpy(), axis=1)[0]
    pred_labels.append(label_id)
    cost_time_list.append((time.time() - row_s_time) * 1000)
    print(i, (time.time() - row_s_time) * 1000, label_id)

print("avg time:", (time.time() - s_time) * 1000 / test_df.shape[0])
print("P50 time:", np.percentile(np.array(cost_time_list), 50))
print("P95 time:", np.percentile(np.array(cost_time_list), 95))

0 241.35112762451172 0
1 180.51815032958984 0
2 223.4025001525879 0
3 185.50443649291992 0
4 239.3631935119629 0
5 266.28708839416504 0
6 183.50839614868164 0
7 210.43825149536133 0
8 222.40734100341797 0
9 201.45726203918457 0
10 216.42231941223145 0
11 196.47455215454102 0
12 212.4323844909668 0
13 222.40734100341797 0
14 218.4152603149414 0
15 360.03780364990234 0
16 366.02163314819336 0
17 379.98461723327637 0
18 354.05468940734863 0
19 386.96742057800293 0
20 354.0499210357666 0
21 356.0497760772705 0
22 411.8990898132324 0
23 380.9828758239746 0
24 363.02828788757324 0
25 412.89639472961426 0
26 363.02900314331055 0
27 378.98755073547363 0
28 355.05127906799316 0
29 262.3000144958496 0
30 300.1971244812012 0
31 387.96234130859375 0
32 329.1196823120117 0
33 392.95005798339844 0
34 363.03114891052246 0
35 376.99317932128906 0
36 342.0853614807129 0
37 211.43794059753418 0
38 336.09795570373535 0
39 372.0076084136963 0
40 378.9863586425781 0
41 382.9765319824219 0
42 362.0336055755

341 364.0296459197998 3
342 354.0499210357666 3
343 396.9426155090332 3
344 370.011568069458 3
345 381.97922706604004 0
346 397.9370594024658 3
347 242.35177040100098 3
348 355.04984855651855 3
349 362.03646659851074 3
350 358.0431938171387 3
351 381.97803497314453 3
352 371.0052967071533 4
353 259.3107223510742 3
354 376.99246406555176 3
355 390.95449447631836 3
356 344.08116340637207 3
357 389.9574279785156 3
358 415.88878631591797 3
359 360.03684997558594 3
360 417.88673400878906 3
361 347.06854820251465 3
362 361.0384464263916 3
363 378.9854049682617 3
364 468.7466621398926 3
365 338.09423446655273 3
366 369.01354789733887 3
367 393.9483165740967 3
368 392.95101165771484 3
369 377.99072265625 3
370 378.98707389831543 3
371 321.148157119751 3
372 414.8898124694824 3
373 401.92699432373047 3
374 362.0319366455078 3
375 456.77852630615234 3
376 379.98342514038086 3
377 347.0723628997803 3
378 426.85961723327637 3
379 372.0054626464844 3
380 383.9740753173828 3
381 380.9816837310791 3


In [16]:
from sklearn.metrics import classification_report

print(classification_report(true_labels, pred_labels, digits=4))

              precision    recall  f1-score   support

           0     0.9802    1.0000    0.9900        99
           1     0.9896    0.9596    0.9744        99
           2     0.9900    1.0000    0.9950        99
           3     0.9216    0.9495    0.9353        99
           4     0.9896    0.9596    0.9744        99

    accuracy                         0.9737       495
   macro avg     0.9742    0.9737    0.9738       495
weighted avg     0.9742    0.9737    0.9738       495



In [25]:
a = np.array([1, 2, 3, 4, 5])
np.percentile(a, 50)

3.0