In [1]:
import numpy as np
import torch, pickle
import lightgbm as lgb
from hummingbird import convert_sklearn
from onnxconverter_common.data_types import FloatTensorType
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris


In [3]:
# use some dataset from sklearn
data = load_iris()
X, y = data.data, data.target
X_torch = torch.from_numpy(X)

In [5]:
# Create and train a model
model = RandomForestClassifier(n_estimators=10)
model.fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [6]:
# Use hummingbird to convert your sklearn model to pytorch
pytorch_model = convert_sklearn(
    model, 
    extra_config = {"tree_implementation": "perf_tree_trav"})

In [7]:
%%timeit -r 3

#time for skl
skl = model.predict(X)

1.21 ms ± 1.94 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)


In [8]:
%%timeit -r 3

# time for hummingbird - CPU
pytorch_model.to('cpu')
hum_cpu = pytorch_model(X_torch)

901 µs ± 1.65 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)


In [9]:
%%timeit -r 3

# time for hummingbird - GPU. Note that you must have a GPU-enabled machine.
pytorch_model.to('cuda')
hum_gpu = pytorch_model(X_torch.to('cuda'))

1.37 ms ± 37 µs per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [10]:
# make sure Hummingbird output matches Scikit-learn as expected
# (note that we have to recreate skl since `timeit` make it out of scope here)
skl = model.predict_proba(X)
pytorch_model.to('cuda')
hum_gpu = pytorch_model(X_torch.to('cuda'))

np.testing.assert_allclose(skl, hum_gpu[0].data.to('cpu').numpy(), rtol=1e-06, atol=1e-06)

AssertionError: 
Not equal to tolerance rtol=1e-06, atol=1e-06

(shapes (150, 3), (150,) mismatch)
 x: array([[1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],...
 y: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...