### Complex Pipeline

numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
categorical_features = [3, 4] # ["vcat", "vcat2"]

classifier = LogisticRegression(C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])),
                                n_jobs=1, max_iter=10, solver='lbfgs', tol=1e-3)

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(sparse=True, handle_unknown='ignore')),
    ('tsvd', TruncatedSVD(n_components=1, algorithm='arpack', tol=1e-4))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

model = Pipeline(steps=[
    ('precprocessor', preprocessor),
    ('classifier', classifier)
])

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Train a model.
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestClassifier()
clr.fit(X_train, y_train)

RandomForestClassifier()

In [3]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open("rf_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [4]:
# Compute the prediction with ONNX Runtime
import onnxruntime as rt
import numpy
sess = rt.InferenceSession("rf_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]

In [12]:
print(pred_onx)

[1 2 2 2 0 1 0 2 2 0 1 0 0 1 0 2 2 0 1 0 0 1 2 1 0 2 0 2 2 1 0 2 1 1 2 0 0
 2]


In [11]:
print(clr.predict(X_test))

[1 2 2 2 0 1 0 2 2 0 1 0 0 1 0 2 2 0 1 0 0 1 2 1 0 2 0 2 2 1 0 2 1 1 2 0 0
 2]


In [21]:
print("predict", clr.predict(X_test[:5]))
#print("predict_proba", clr.predict_proba(X_test[:1]))

predict [1 2 2 2 0]


In [22]:
sess = rt.InferenceSession("rf_iris.onnx")
pred_onx = sess.run([label_name], {input_name: X_test[:5].astype(numpy.float32)})
print("predict", pred_onx[0])

predict [1 2 2 2 0]


In [20]:
len(pred_onx)

1