In [7]:
import numpy as np
import torch, pickle
import lightgbm as lgb
from hummingbird import convert_sklearn
from onnxconverter_common.data_types import FloatTensorType

In [12]:
# load the LGBM model for Fraud
model = pickle.load(open('lgbm-500-6.pkl', "rb"))

# create some random data
X = np.array(np.random.rand(200000, 28), dtype=np.float32)
X_torch = torch.from_numpy(X)

# use hummingbird to convert your sklearn model to pytorch
pytorch_model = convert_sklearn(
    model, 
    [("input", FloatTensorType([200000, 28]))], 
    extra_config = {"tree_implementation": "perf_tree_trav"})

In [13]:
%%timeit -r 3

#time for skl
skl = model.predict(X)

1.98 s ± 3.06 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [14]:
%%timeit -r 3

# time for hummingbird - CPU
pytorch_model.to('cpu')
hum_cpu = pytorch_model(X_torch)

5.35 s ± 9.89 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [15]:
%%timeit -r 3

# time for hummingbird - GPU. Note that you must have a GPU-enabled machine.
pytorch_model.to('cuda')
hum_gpu = pytorch_model(X_torch.to('cuda'))

165 ms ± 54.7 µs per loop (mean ± std. dev. of 3 runs, 10 loops each)


In [16]:
# make sure Hummingbird output matches Scikit-learn as expected
skl = model.predict_proba(X)
pytorch_model.to('cuda')
hum_gpu = pytorch_model(X_torch.to('cuda'))

np.testing.assert_allclose(skl, hum_gpu[1].data.to('cpu').numpy(), rtol=1e-06, atol=1e-06)