# 8. ONNX 模型格式转换

- ONNX
- pmml

In [1]:
# !pip install lightgbm onnx onnxruntime skl2onnx
# !pip list | grep tensorflow
# !pip install -U tensorflow
# !pip install protobuf==3.20.2

In [2]:
DIRECTORY = './data'
HEALTH_FILE = 'healthexp.csv'
ONNX_FILE = 'healthexp.onnx'

In [3]:
import os

if os.environ.get('MKL_THREADING_LAYER') is None:
    os.environ['MKL_THREADING_LAYER'] = 'GNU'
print(f"MKL_THREADING_LAYER = {os.environ['MKL_THREADING_LAYER']}")

MKL_THREADING_LAYER = GNU


In [4]:
import pandas as pd
import sklearn
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

import util

## 一、训练一个简单模型

In [5]:
file_path = util.gen_abspath(DIRECTORY, HEALTH_FILE)
df = util.read_csv(file_path, sep=',', header=0)
df

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9
...,...,...,...,...
269,2020,Germany,6938.983,81.1
270,2020,France,5468.418,82.3
271,2020,Great Britain,5018.700,80.4
272,2020,Japan,4665.641,84.7


In [6]:
df = pd.get_dummies(df)

# features and label
X = df.drop(['Life_Expectancy'], axis=1)
y = df['Life_Expectancy']

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=43)

rfr = sklearn.ensemble.RandomForestRegressor(random_state=32)

rfr.fit(X_train, y_train)

In [7]:
rfr.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 32,
 'verbose': 0,
 'warm_start': False}

In [8]:
y_pred = rfr.predict(X_test)

# MSE
mse = sklearn.metrics.mean_squared_error(y_test, y_pred)

# r2_score
r2 = sklearn.metrics.r2_score(y_test, y_pred)

print(f'mse: {mse}')
print(f'r2_score: {r2}')

mse: 0.0743528363636328
r2_score: 0.992869840067207


## 二、将模型文件存成 ONNX

In [9]:
number_of_features = len(X.columns)
print(f'number_of_features: {number_of_features}')
initial_type = [('input', FloatTensorType([None, number_of_features]))]

number_of_features: 8


In [10]:
onnx_model = convert_sklearn(rfr, initial_types=initial_type)
# onnx_model

In [11]:
onnx_path = util.gen_abspath(DIRECTORY, ONNX_FILE)
with open(onnx_path, 'wb') as f:
    f.write(onnx_model.SerializeToString())

可以用 [netron](https://github.com/lutzroeder/netron) 可视化模型文件