In [1]:
import onnx
import joblib
import xgboost
from onnxmltools.convert import convert_xgboost

from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType


In [2]:
class DataSet:
    def __init__(self, name, ncols, TARGET_OPSET):
        self.name = name
        self.ncols = ncols
        self.TARGET_OPSET = TARGET_OPSET

datasets = [
    DataSet("Acute_Inflammations", 6, 15),
    DataSet("Breast_Cancer", 9, 15),
    DataSet("Chronic_Kidney_Disease", 24, 15),
    DataSet("Heart_Disease", 13, 15),
    DataSet("Heart_Failure_Clinical_Records", 12, 15),
    DataSet("Lymphography", 18, 15),
    DataSet("Parkinsons", 22, 15),
    ]


In [3]:
for dataset in datasets:
    # load the xgboost model
    model_path = f"{dataset.name}/{dataset.name}.pkl"
    xgboost_clf = joblib.load(model_path)
    onnx_model_converted = convert_xgboost(xgboost_clf, 'tree-based classifier',
                             [('input', FloatTensorType([1, dataset.ncols]))],
                             target_opset=dataset.TARGET_OPSET)
    onnx.save_model(onnx_model_converted, f"{dataset.name}/{dataset.name}.onnx")

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



# Execute predict

In [10]:
import onnxruntime as rt
import pandas as pd
import numpy

In [78]:
for dataset in datasets:
    # load the xgboost model
    if dataset.name == "Acute_Inflammations":
        # load ONNX model
        model_path = f"{dataset.name}/{dataset.name}.onnx"
        # load data set
        new_path = f"{dataset.name}/new_data.tsv"
        print(new_path)
        titanic = pd.read_table(new_path, sep="\t", header=None)
        num_columns = titanic.shape[1]
        # x = titanic[[i for i in range(num_columns-1)]]
        # x = numpy.array(x)
        # print(x[:5])
        # y = titanic.iloc[:, -1]
        # 0.9333333333333336  1.0 1.0 1.0 1.0 0.0 3.0
        x = numpy.array([[0.06666666666666666643, 0.0, 0.0, 1.0, 1.0, 1.0], [0.9333333333333336, 1.0, 1.0, 1.0, 1.0, 0.0], [0.9333333333333336, 1.0, 1.0, 1.0, 1.0, 1.0], [0.9833333333333334, 0.0, 1.0, 1.0, 0.0, 1.0]])
        sess = rt.InferenceSession(model_path, providers=["CPUExecutionProvider"])
        pred_onx = sess.run(None, {"input": x[1:2].astype(numpy.float32)})
        print("predict", pred_onx[0])
        print("predict_proba", pred_onx[1][:1])
        # print("predict", pred_onx[0].ravel())

Acute_Inflammations/new_data.tsv
predict [3]
predict_proba [[0.02602444 0.02666548 0.04841037 0.89889973]]


# ONNX model info

In [5]:
from pprint import pprint
import onnxruntime

onnx_path = "Acute_Inflammations/Acute_Inflammations.onnx"
# onnx_path = "../decision_tree/decision_tree_ac.onnx"

provider = "CPUExecutionProvider"
onnx_session = onnxruntime.InferenceSession(onnx_path, providers=[provider])

print("----------------- inputs info -----------------")
input_tensors = onnx_session.get_inputs()  
for input_tensor in input_tensors:         
    
    input_info = {
        "name" : input_tensor.name,
        "type" : input_tensor.type,
        "shape": input_tensor.shape,
    }
    pprint(input_info)

print("----------------- outputs info -----------------")
output_tensors = onnx_session.get_outputs()  
for output_tensor in output_tensors:         
    
    output_info = {
        "name" : output_tensor.name,
        "type" : output_tensor.type,
        "shape": output_tensor.shape,
    }
    pprint(output_info)

----------------- inputs info -----------------
{'name': 'input', 'shape': [1, 6], 'type': 'tensor(float)'}
----------------- outputs info -----------------
{'name': 'label', 'shape': [1], 'type': 'tensor(int64)'}
{'name': 'probabilities', 'shape': [1, 4], 'type': 'tensor(float)'}
