In [2]:
from sklearn.decomposition import PCA, KernelPCA, TruncatedSVD
from sklearn import datasets
import sklearn
import hummingbird.ml as hb
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skl2onnx import convert_sklearn, to_onnx
import onnxruntime as rt

  tys = obj.typeStr or ''
  if getattr(obj, 'isHomogeneous', False):
  return getattr(obj, attribute)


# Load breast cancer wisconsin dataset

In [3]:
random_state=0
X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, 
                                      test_size=0.3, random_state=random_state)

# Initialize and fit the models to training set

In [4]:
n_components = 15
sklearn_models = [
    PCA(n_components=n_components),
    KernelPCA(n_components=n_components),
    TruncatedSVD(n_components=n_components)
]

In [5]:
for model in sklearn_models:
    model.fit(X_train)

# Convert the models to torchscript using Hummingbird and test the results

In [6]:
torch_models = []
for model in sklearn_models:
    torch_model = hb.convert(model, "TorchScript", test_input = X_train)
    torch_models.append(torch_model)

In [27]:
for i, model_name in enumerate(["PCA", "KernelPCA", "TruncatedSVD"]):
    sklearn_transformed = sklearn_models[i].transform(X_test)
    torch_transformed = torch_models[i].transform(X_test)
    print("Do sklearn and torch "+model_name+" models generate close results?")
    res = np.allclose(sklearn_transformed, torch_transformed, atol=1)
    print(res)

Do sklearn and torch PCA models generate close results?
True
Do sklearn and torch KernelPCA models generate close results?
True
Do sklearn and torch TruncatedSVD models generate close results?
True


KernelPCA produces the same results with absolute tolerance (atol) equal to 1, but not 0.1.

# Convert the models to onnx using skl2onnx and test the results

In [21]:
onnx_models = []
for model in sklearn_models:
    onnx_model = to_onnx(model, X = X_train)
    onnx_models.append(onnx_model)

In [29]:
for i, model_name in enumerate(["PCA", "KernelPCA", "TruncatedSVD"]):
    sklearn_transformed = sklearn_models[i].transform(X_test)
    sess = rt.InferenceSession(onnx_models[i].SerializeToString())
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    onnx_transformed = sess.run([label_name], {input_name: X_test})[0]
    
    print("Do sklearn and onnx "+model_name+" models generate close results?")
    res = np.allclose(sklearn_transformed, onnx_transformed, atol=1e-04)
    print(res)

Do sklearn and onnx PCA models generate close results?
True
Do sklearn and onnx KernelPCA models generate close results?
True
Do sklearn and onnx TruncatedSVD models generate close results?
True


# Conclusion

Hummingbird successfully converted the models to torchscript, and the results of the converted models match those of initial sklearn models. However, it can't convert them to onnx. Also, note that converted KernelPCA model matches the results of an initial model only with absolute tolerance of 1.

Skl2onnx successfully converted the models to ONNX, and the results of all converted models match those of initial sklearn models.
