This repository has been archived by the owner on Jan 13, 2024. It is now read-only.
/
plot_speedup_pca.py
128 lines (98 loc) · 3.27 KB
/
plot_speedup_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
.. _l-speedup-pca:
Speed up scikit-learn inference with ONNX
=========================================
Is it possible to make :epkg:`scikit-learn` faster with ONNX?
That's question this example tries to answer. The scenario is
is the following:
* a model is trained
* it is converted into ONNX for inference
* it selects a runtime to compute the prediction
The following runtime are tested:
* `python`: python runtime for ONNX
* `onnxruntime1`: :epkg:`onnxruntime`
* `numpy`: the ONNX graph is converted into numpy code
* `numba`: the numpy code is accelerated with :epkg:`numba`.
.. contents::
:local:
PCA
+++
Let's look at a very simple model, a PCA.
"""
import numpy
from pandas import DataFrame
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.decomposition import PCA
from pyquickhelper.pycode.profiling import profile
from mlprodict.sklapi import OnnxSpeedUpTransformer
from mlprodict.tools.speed_measure import measure_time
from tqdm import tqdm
################################
# Data and models to test.
data, _ = make_regression(1000, n_features=20)
data = data.astype(numpy.float32)
models = [
('sklearn', PCA(n_components=10)),
('python', OnnxSpeedUpTransformer(
PCA(n_components=10), runtime='python')),
('onnxruntime1', OnnxSpeedUpTransformer(
PCA(n_components=10), runtime='onnxruntime1')),
('numpy', OnnxSpeedUpTransformer(
PCA(n_components=10), runtime='numpy')),
('numba', OnnxSpeedUpTransformer(
PCA(n_components=10), runtime='numba'))]
#################################
# Training.
for name, model in tqdm(models):
model.fit(data)
#################################
# Profiling of runtime `onnxruntime1`.
def fct():
for i in range(1000):
models[2][1].transform(data)
res = profile(fct, pyinst_format="text")
print(res[1])
#################################
# Profiling of runtime `numpy`.
def fct():
for i in range(1000):
models[3][1].transform(data)
res = profile(fct, pyinst_format="text")
print(res[1])
#################################
# The class *OnnxSpeedUpTransformer* converts the PCA
# into ONNX and then converts it into a python code using
# *numpy*. The code is the following.
print(models[3][1].numpy_code_)
#################################
# Benchmark.
bench = []
for name, model in tqdm(models):
for size in (1, 10, 100, 1000, 10000, 100000, 200000):
data, _ = make_regression(size, n_features=20)
data = data.astype(numpy.float32)
# We run it a first time (numba compiles
# the function during the first execution).
model.transform(data)
res = measure_time(
"model.transform(data)", div_by_number=True,
context={'data': data, 'model': model})
res['name'] = name
res['size'] = size
bench.append(res)
df = DataFrame(bench)
piv = df.pivot("size", "name", "average")
piv
######################################
# Graph.
fig, ax = plt.subplots(1, 2, figsize=(10, 4))
piv.plot(title="Speedup PCA with ONNX (lower better)",
logx=True, logy=True, ax=ax[0])
piv2 = piv.copy()
for c in piv2.columns:
piv2[c] /= piv['sklearn']
print(piv2)
piv2.plot(title="baseline=scikit-learn (lower better)",
logx=True, logy=True, ax=ax[1])
plt.show()