In [13]:
import numpy as np
import torch
import hummingbird
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer

In [14]:
# We are going to use the breast cancer dataset from scikit-learn for this example.
X, y = load_breast_cancer(return_X_y=True)
nrows=15000
X = X[0:nrows].astype('|f4')
y = y[0:nrows]

In [15]:
# Create and train a random forest model.
model = RandomForestClassifier(n_estimators=10, max_depth=10)
model.fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [16]:
# Use Hummingbird to convert your scikit-learn model to PyTorch.
# In this specific case we force the tree implementation to use the GEMM strategy.
hb_model = model.to_pytorch(extra_config = {"tree_implementation": "gemm"})

In [17]:
%%timeit -r 3

# Time for scikit-learn.
model.predict(X)

2.19 ms ± 20.8 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


In [18]:
%%timeit -r 3

# Time for Hummingbird - By default CPU execution is used.
hb_model.predict(X)

1.45 ms ± 37.7 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)


In [19]:
%%timeit -r 3

# Time for Hummingbird - GPU. Note that you must have a GPU-enabled machine.
hb_model.to('cuda')
hb_model.predict(X)

AssertionError: 
Found no NVIDIA driver on your system. Please check that you
have an NVIDIA GPU and installed a driver from
http://www.nvidia.com/Download/index.aspx

In [8]:
# Make sure Hummingbird output matches scikit-learn.
skl = model.predict_proba(X)
hum = hb_model.predict_proba(X)

np.testing.assert_allclose(skl, hum, rtol=1e-6, atol=1e-6)