From 8022b72fd213ea3e709dbdaafb77120410a579c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 26 May 2021 01:38:36 +0200 Subject: [PATCH 01/16] complex scenarios --- .gitignore | 1 + doc/tutorial.rst | 1 + doc/tutorial_4_complex.rst | 10 ++ examples/plot_usparse_xgboost.py | 199 +++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+) create mode 100644 doc/tutorial_4_complex.rst create mode 100644 examples/plot_usparse_xgboost.py diff --git a/.gitignore b/.gitignore index e3ed7ec6..e64bd609 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ examples/squeezenet1.1-7.onnx tests/pipeline*.onnx temp_* examples/pipeline_lightgbm.onnx +examples/model.onnx diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 9f30cba5..34867134 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -16,6 +16,7 @@ involving operator not actually implemented in tutorial_1-5_external tutorial_2_new_converter tutorial_3_new_operator + tutorial_4_complex The tutorial was tested with following version: diff --git a/doc/tutorial_4_complex.rst b/doc/tutorial_4_complex.rst new file mode 100644 index 00000000..4d20ed74 --- /dev/null +++ b/doc/tutorial_4_complex.rst @@ -0,0 +1,10 @@ + +Complex Scenarios +================= + +Discrepencies may happen. Let's see some unexpected cases. + +.. toctree:: + :maxdepth: 1 + + auto_examples/plot_usparse_xgboost diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py new file mode 100644 index 00000000..783a7d84 --- /dev/null +++ b/examples/plot_usparse_xgboost.py @@ -0,0 +1,199 @@ +""" +.. _example-sparse-tfidf: + +TfIdf and sparse matrices +========================= + +.. index:: XGBoost, lightgbm, RandomForest + + +.. contents:: + :local: + +Train a RandomForestClassifier after sparse ++++++++++++++++++++++++++++++++++++++++++++ +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +import numpy +import pandas +import onnxruntime as rt +from sklearn.compose import ColumnTransformer +from sklearn.datasets import load_iris +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.ensemble import RandomForestClassifier +from xgboost import XGBClassifier +from lightgbm import LGBMClassifier +from skl2onnx.common.data_types import FloatTensorType, StringTensorType +from skl2onnx import convert_sklearn, update_registered_converter +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes) +from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( + convert_xgboost) +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( + convert_lightgbm) + + +update_registered_converter( + XGBClassifier, 'XGBoostXGBClassifier', + calculate_linear_classifier_output_shapes, convert_xgboost, + options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) +update_registered_converter( + LGBMClassifier, 'LightGbmLGBMClassifier', + calculate_linear_classifier_output_shapes, convert_lightgbm, + options={'nocl': [True, False], 'zipmap': [True, False]}) + + +cst = ['class zero', 'class one', 'class two'] + +data = load_iris() +X = data.data[:, :2] +y = data.target + +df = pandas.DataFrame(X) +df["text"] = [cst[i] for i in y] + + +ind = numpy.arange(X.shape[0]) +numpy.random.shuffle(ind) +X = X[ind, :].copy() +y = y[ind].copy() + + +pipe = Pipeline([ + ('union', ColumnTransformer([ + ('scale1', StandardScaler(), [0, 1]), + ('subject', + Pipeline([ + ('count', CountVectorizer()), + ('tfidf', TfidfTransformer()) + ]), "text"), + ], sparse_threshold=1.)), + ('cls', RandomForestClassifier(n_estimators=5, max_depth=3)), +]) + +pipe.fit(df, y) + + +# Convert + +model_onnx = convert_sklearn( + pipe, 'pipeline_xgboost', + [('input', FloatTensorType([None, 2])), + ('text', StringTensorType([None, 1]))], + target_opset=12, + options={RandomForestClassifier: {'zipmap': False}}) + + +# Compare the predictions + +print("predict", pipe.predict(df[:5])) +print("predict_proba", pipe.predict_proba(df[:2])) + +# Predictions with onnxruntime. + +sess = rt.InferenceSession(model_onnx.SerializeToString()) +pred_onx = sess.run(None, { + "input": df[[0, 1]].values.astype(numpy.float32), + "text": df[["text"]].values}) +print("predict", pred_onx[0][:5]) +print("predict_proba", pred_onx[1][:2]) + +print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, + numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) + +############################################ +# Train a XGBoost after sparse +# ++++++++++++++++++++++++++++ + +pipe = Pipeline([ + ('union', ColumnTransformer([ + ('scale1', StandardScaler(), [0, 1]), + ('subject', + Pipeline([ + ('count', CountVectorizer(ngram_range=(1, 2))), + ('tfidf', TfidfTransformer()) + ]), "text"), + ], sparse_threshold=1.)), + ('cls', XGBClassifier(n_estimators=5, max_depth=3)), +]) + +pipe.fit(df, y) + +model_onnx = convert_sklearn( + pipe, 'pipeline_xgboost', + [('input', FloatTensorType([None, 2])), + ('text', StringTensorType([None, 1]))], + target_opset=12, + options={XGBClassifier: {'zipmap': False}}) + +print("predict", pipe.predict(df[:5])) +print("predict_proba", pipe.predict_proba(df[:2])) + +with open('model.onnx', 'wb') as f: + f.write(model_onnx.SerializeToString()) + +sess = rt.InferenceSession(model_onnx.SerializeToString()) +pred_onx = sess.run(None, { + "input": df[[0, 1]].values.astype(numpy.float32), + "text": df[["text"]].values}) +print("predict", pred_onx[0][:5]) +print("predict_proba", pred_onx[1][:2]) + +print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, + numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) + + +############################################ +# Train a LightGBM after sparse +# +++++++++++++++++++++++++++++ + +pipe = Pipeline([ + ('union', ColumnTransformer([ + ('scale1', StandardScaler(), [0, 1]), + ('subject', + Pipeline([ + ('count', CountVectorizer(ngram_range=(1, 2))), + ('tfidf', TfidfTransformer()) + ]), "text"), + ], sparse_threshold=1.)), + ('cls', LGBMClassifier(n_estimators=5, max_depth=3)), +]) + +pipe.fit(df, y) + +model_onnx = convert_sklearn( + pipe, 'pipeline_lgb', + [('input', FloatTensorType([None, 2])), + ('text', StringTensorType([None, 1]))], + target_opset=12, + options={LGBMClassifier: {'zipmap': False}}) + +print("predict", pipe.predict(df[:5])) +print("predict_proba", pipe.predict_proba(df[:2])) + +with open('model.onnx', 'wb') as f: + f.write(model_onnx.SerializeToString()) + +sess = rt.InferenceSession(model_onnx.SerializeToString()) +pred_onx = sess.run(None, { + "input": df[[0, 1]].values.astype(numpy.float32), + "text": df[["text"]].values}) +print("predict", pred_onx[0][:5]) +print("predict_proba", pred_onx[1][:2]) + +print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, + numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) + + +############################# +# Final graph +# +++++++++++ + + +oinf = OnnxInference(model_onnx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) From 843bf487cc2e216ff24777bfd1ccb7ff71bbd739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 26 May 2021 20:13:00 +0200 Subject: [PATCH 02/16] complete example --- examples/plot_usparse_xgboost.py | 385 +++++++++++++++++++------------ 1 file changed, 243 insertions(+), 142 deletions(-) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index 783a7d84..8449ef38 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -4,30 +4,43 @@ TfIdf and sparse matrices ========================= -.. index:: XGBoost, lightgbm, RandomForest +.. index:: xgboost, lightgbm, sparse, ensemble +`TfidfVectorizer `_ +usually creates sparse data. If the data is sparse enough, matrices +usually stays as sparse all along the pipeline until the predictor +is trained. Sparse matrices do not consider null and missing values +as they are not present in the datasets. Because some predictors +do the difference, this ambiguity may introduces discrepencies +when converter into ONNX. This example looks into several configurations. .. contents:: :local: -Train a RandomForestClassifier after sparse -+++++++++++++++++++++++++++++++++++++++++++ +Imports, setups ++++++++++++++++ + +All imports. It also registered onnx converters for :epgk:`xgboost` +and :epkg:`lightgbm`. """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz -from mlprodict.onnxrt import OnnxInference +import warnings import numpy import pandas import onnxruntime as rt +from tqdm import tqdm from sklearn.compose import ColumnTransformer from sklearn.datasets import load_iris from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer -from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import ( + RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier from lightgbm import LGBMClassifier from skl2onnx.common.data_types import FloatTensorType, StringTensorType -from skl2onnx import convert_sklearn, update_registered_converter +from skl2onnx import to_onnx, update_registered_converter +from skl2onnx.sklapi import CastTransformer, ReplaceTransformer from skl2onnx.common.shape_calculator import ( calculate_linear_classifier_output_shapes) from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( @@ -46,6 +59,12 @@ options={'nocl': [True, False], 'zipmap': [True, False]}) +########################################## +# Artificial datasets +# +++++++++++++++++++++++++++ +# +# Iris + a text column. + cst = ['class zero', 'class one', 'class two'] data = load_iris() @@ -62,138 +81,220 @@ y = y[ind].copy() -pipe = Pipeline([ - ('union', ColumnTransformer([ - ('scale1', StandardScaler(), [0, 1]), - ('subject', - Pipeline([ - ('count', CountVectorizer()), - ('tfidf', TfidfTransformer()) - ]), "text"), - ], sparse_threshold=1.)), - ('cls', RandomForestClassifier(n_estimators=5, max_depth=3)), -]) - -pipe.fit(df, y) - - -# Convert - -model_onnx = convert_sklearn( - pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2])), - ('text', StringTensorType([None, 1]))], - target_opset=12, - options={RandomForestClassifier: {'zipmap': False}}) - - -# Compare the predictions - -print("predict", pipe.predict(df[:5])) -print("predict_proba", pipe.predict_proba(df[:2])) - -# Predictions with onnxruntime. - -sess = rt.InferenceSession(model_onnx.SerializeToString()) -pred_onx = sess.run(None, { - "input": df[[0, 1]].values.astype(numpy.float32), - "text": df[["text"]].values}) -print("predict", pred_onx[0][:5]) -print("predict_proba", pred_onx[1][:2]) - -print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, - numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) - -############################################ -# Train a XGBoost after sparse -# ++++++++++++++++++++++++++++ - -pipe = Pipeline([ - ('union', ColumnTransformer([ - ('scale1', StandardScaler(), [0, 1]), - ('subject', - Pipeline([ - ('count', CountVectorizer(ngram_range=(1, 2))), - ('tfidf', TfidfTransformer()) - ]), "text"), - ], sparse_threshold=1.)), - ('cls', XGBClassifier(n_estimators=5, max_depth=3)), -]) - -pipe.fit(df, y) - -model_onnx = convert_sklearn( - pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2])), - ('text', StringTensorType([None, 1]))], - target_opset=12, - options={XGBClassifier: {'zipmap': False}}) - -print("predict", pipe.predict(df[:5])) -print("predict_proba", pipe.predict_proba(df[:2])) - -with open('model.onnx', 'wb') as f: - f.write(model_onnx.SerializeToString()) - -sess = rt.InferenceSession(model_onnx.SerializeToString()) -pred_onx = sess.run(None, { - "input": df[[0, 1]].values.astype(numpy.float32), - "text": df[["text"]].values}) -print("predict", pred_onx[0][:5]) -print("predict_proba", pred_onx[1][:2]) - -print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, - numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) - - -############################################ -# Train a LightGBM after sparse -# +++++++++++++++++++++++++++++ - -pipe = Pipeline([ - ('union', ColumnTransformer([ - ('scale1', StandardScaler(), [0, 1]), - ('subject', - Pipeline([ - ('count', CountVectorizer(ngram_range=(1, 2))), - ('tfidf', TfidfTransformer()) - ]), "text"), - ], sparse_threshold=1.)), - ('cls', LGBMClassifier(n_estimators=5, max_depth=3)), -]) - -pipe.fit(df, y) - -model_onnx = convert_sklearn( - pipe, 'pipeline_lgb', - [('input', FloatTensorType([None, 2])), - ('text', StringTensorType([None, 1]))], - target_opset=12, - options={LGBMClassifier: {'zipmap': False}}) - -print("predict", pipe.predict(df[:5])) -print("predict_proba", pipe.predict_proba(df[:2])) - -with open('model.onnx', 'wb') as f: - f.write(model_onnx.SerializeToString()) - -sess = rt.InferenceSession(model_onnx.SerializeToString()) -pred_onx = sess.run(None, { - "input": df[[0, 1]].values.astype(numpy.float32), - "text": df[["text"]].values}) -print("predict", pred_onx[0][:5]) -print("predict_proba", pred_onx[1][:2]) - -print("%s differences:" % pipe.steps[-1][-1].__class__.__name__, - numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()) - - -############################# -# Final graph -# +++++++++++ - - -oinf = OnnxInference(model_onnx) -ax = plot_graphviz(oinf.to_dot()) -ax.get_xaxis().set_visible(False) -ax.get_yaxis().set_visible(False) +########################################## +# Train ensemble after sparse +# +++++++++++++++++++++++++++ +# +# The example use the Iris datasets with artifical text datasets +# preprocessed with a tf-idf. `sparse_threshold=1.` avoids +# sparse matrices to be converted into dense matrices. + + +def make_pipelines(df_train, y_train, models=None, + sparse_threshold=1., replace_nan=False, + insert_replace=False, verbose=False): + + if models is None: + models = [ + RandomForestClassifier, HistGradientBoostingClassifier, + XGBClassifier, LGBMClassifier] + + pipes = [] + for model in tqdm(models): + + if model == HistGradientBoostingClassifier: + kwargs = dict(max_iter=5) + elif model == XGBClassifier: + kwargs = dict(n_estimators=5, use_label_encoder=False) + else: + kwargs = dict(n_estimators=5) + + if insert_replace: + pipe = Pipeline([ + ('union', ColumnTransformer([ + ('scale1', StandardScaler(), [0, 1]), + ('subject', + Pipeline([ + ('count', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('repl', ReplaceTransformer()), + ]), "text"), + ], sparse_threshold=sparse_threshold)), + ('cast', CastTransformer()), + ('cls', model(max_depth=3, **kwargs)), + ]) + else: + pipe = Pipeline([ + ('union', ColumnTransformer([ + ('scale1', StandardScaler(), [0, 1]), + ('subject', + Pipeline([ + ('count', CountVectorizer()), + ('tfidf', TfidfTransformer()) + ]), "text"), + ], sparse_threshold=sparse_threshold)), + ('cast', CastTransformer()), + ('cls', model(max_depth=3, **kwargs)), + ]) + + try: + pipe.fit(df_train, y_train) + except TypeError as e: + obs = dict(model=model.__name__, pipe=pipe, error=e) + pipes.append(obs) + continue + + options = {model: {'zipmap': False}} + if replace_nan: + options[TfidfTransformer] = {'nan': True} + + # convert + with warnings.catch_warnings(record=False): + warnings.simplefilter("ignore", (FutureWarning, UserWarning)) + model_onnx = to_onnx( + pipe, + initial_types=[('input', FloatTensorType([None, 2])), + ('text', StringTensorType([None, 1]))], + target_opset=12, options=options) + + with open('model.onnx', 'wb') as f: + f.write(model_onnx.SerializeToString()) + + sess = rt.InferenceSession(model_onnx.SerializeToString()) + inputs = {"input": df[[0, 1]].values.astype(numpy.float32), + "text": df[["text"]].values} + pred_onx = sess.run(None, inputs) + + diff = numpy.abs( + pred_onx[1].ravel() - + pipe.predict_proba(df).ravel()).sum() + + if verbose: + from mlprodict.onnxrt import OnnxInference + + def td(a): + if hasattr(a, 'todense'): + b = a.todense() + ind = set(a.indices) + for i in range(b.shape[1]): + if i not in ind: + b[0, i] = numpy.nan + return b + return a + + oinf = OnnxInference(model_onnx) + pred_onx2 = oinf.run(inputs) + diff2 = numpy.abs( + pred_onx2['probabilities'].ravel() - + pipe.predict_proba(df).ravel()).sum() + + if diff > 0.1: + for i, (l1, l2) in enumerate( + zip(pipe.predict_proba(df), pred_onx[1])): + d = numpy.abs(l1 - l2).sum() + if verbose and d > 0.1: + print("\nDISCREPENCY DETAILS") + print(d, i, l1, l2) + pre = pipe.steps[0][-1].transform(df) + print("idf", pre[i].dtype, td(pre[i])) + pre2 = pipe.steps[1][-1].transform(pre) + print("cas", pre2[i].dtype, td(pre2[i])) + inter = oinf.run(inputs, intermediate=True) + onx = inter['tfidftr_norm'] + print("onx", onx.dtype, onx[i]) + onx = inter['variable3'] + + obs = dict(model=model.__name__, + discrepencies=diff, + model_onnx=model_onnx, pipe=pipe) + if verbose: + obs['discrepency2'] = diff2 + pipes.append(obs) + + return pipes + + +data_sparse = make_pipelines(df, y) +stat = pandas.DataFrame(data_sparse).drop(['model_onnx', 'pipe'], axis=1) +if 'error' in stat.columns: + print(stat.drop('error', axis=1)) +stat + +############################ +# Sparse data hurts. +# +# Dense data +# ++++++++++ +# +# Let's replace sparse data with dense by using `sparse_threshold=0.` + + +data_dense = make_pipelines(df, y, sparse_threshold=0.) +stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) +if 'error' in stat.columns: + print(stat.drop('error', axis=1)) +stat + +#################################### +# This is much better. Let's compare how the preprocessing +# applies on the data. + +print("sparse") +print(data_sparse[-1]['pipe'].steps[0][-1].transform(df)[:2]) +print() +print("dense") +print(data_dense[-1]['pipe'].steps[0][-1].transform(df)[:2]) + +#################################### +# This shows `RandomForestClassifier +# `_, +# `XGBClassifier `_ do not process +# the same way sparse and +# dense matrix as opposed to `LGBMClassifier +# `_. +# And `HistGradientBoostingClassifier +# `_ +# fails. +# +# Dense data with nan +# +++++++++++++++++++ +# +# Let's keep sparse data in the scikit-learn pipeline but +# replace null values by nan in the onnx graph. + +data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=True) +stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) +if 'error' in stat.columns: + print(stat.drop('error', axis=1)) +stat + + +############################## +# Dense, 0 replaced by nan +# ++++++++++++++++++++++++ +# +# Instead of using a specific options to replace null values +# into nan values, a custom transformer called +# ReplaceTransformer is explicitely inserted into the pipeline. +# A new converter is added to the list of supported models. +# It is equivalent to the previous options except it is +# more explicit. + +data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=False, + insert_replace=True) +stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) +if 'error' in stat.columns: + print(stat.drop('error', axis=1)) +stat + +###################################### +# Conclusion +# ++++++++++ +# +# Unless dense arrays are used, because :epkg:`onnxruntime` +# ONNX does not support sparse yet, the conversion needs to be +# tuned depending on the model which follows the TfIdf preprocessing. From 5cfaa7348c34ddf8de764030b34c50efbc7c9337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 27 May 2021 16:01:55 +0200 Subject: [PATCH 03/16] update travis link --- README.rst | 4 ++-- doc/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 369d5986..6e9e8409 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master -.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master - :target: https://travis-ci.org/sdpython/onnxcustom +.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master + :target: https://travis-ci.com/sdpython/onnxcustom :alt: Build status .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true diff --git a/doc/index.rst b/doc/index.rst index 08adc8a2..09c8d45f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,8 +5,8 @@ onnxcustom: deploy machine learned models .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master -.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master - :target: https://travis-ci.org/sdpython/onnxcustom +.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master + :target: https://travis-ci.com/sdpython/onnxcustom :alt: Build status .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true From 781b4f321ddb579629fe48b6573de0217decf8bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 11 Jul 2021 17:51:45 +0200 Subject: [PATCH 04/16] Fixes example for scikit-learn>=0.24 --- examples/plot_cbegin_opset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_cbegin_opset.py b/examples/plot_cbegin_opset.py index 7bc0d82b..0fa3bdcd 100644 --- a/examples/plot_cbegin_opset.py +++ b/examples/plot_cbegin_opset.py @@ -43,7 +43,7 @@ X, y = make_blobs(n_samples=100, n_features=2) -model = IsolationForest(3) +model = IsolationForest(n_estimators=3) model.fit(X) labels = model.predict(X) From 75398af1a24952cf734b91f6541fb30ebcb643d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 11 Jul 2021 17:54:53 +0200 Subject: [PATCH 05/16] Update config.yml --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5c9b0d87..0e5eaa33 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 jobs: build: docker: - - image: circleci/python:3.8.7 + - image: circleci/python:3.9.5 working_directory: ~/repo From 88fa3aa81e66ce7a231c98c9d9a326446aa984af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 11 Jul 2021 18:08:32 +0200 Subject: [PATCH 06/16] Update requirements-dev.txt --- requirements-dev.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index de1ab1a6..4078c9e5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,7 @@ mlinsights mlprodict>=0.5 nbsphinx onnx>=1.8.0 -git+https://github.com/xadupre/onnxconverter-common.git@jenkins +onnxconverter-common onnxruntime>=1.6.0 pillow py-spy @@ -19,7 +19,7 @@ pyquickhelper>=1.10 pytest pytest-cov scikit-learn>=0.24 -git+https://github.com/xadupre/sklearn-onnx.git@jenkins +skl2onnx>=1.9.0 sphinx sphinxcontrib-blockdiag sphinx-gallery From f35f2f5e004741e902930172435460f4a06d758d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 11 Jul 2021 18:20:25 +0200 Subject: [PATCH 07/16] Update plot_usparse_xgboost.py --- examples/plot_usparse_xgboost.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index 8449ef38..5ea7cfe7 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -34,6 +34,7 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.experimental import enable_hist_gradient_boosting from sklearn.ensemble import ( RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier From 31d90c0345553d9712fe65ea2099d36ad045f141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 11 Jul 2021 18:38:13 +0200 Subject: [PATCH 08/16] update example --- examples/plot_usparse_xgboost.py | 13 ++++++------- requirements-dev.txt | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index 5ea7cfe7..b556f24c 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -27,7 +27,6 @@ import warnings import numpy import pandas -import onnxruntime as rt from tqdm import tqdm from sklearn.compose import ColumnTransformer from sklearn.datasets import load_iris @@ -48,6 +47,7 @@ convert_xgboost) from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( convert_lightgbm) +from mlprodict.onnxrt import OnnxInference update_registered_converter( @@ -161,18 +161,16 @@ def make_pipelines(df_train, y_train, models=None, with open('model.onnx', 'wb') as f: f.write(model_onnx.SerializeToString()) - sess = rt.InferenceSession(model_onnx.SerializeToString()) + oinf = OnnxInference(model_onnx) inputs = {"input": df[[0, 1]].values.astype(numpy.float32), "text": df[["text"]].values} - pred_onx = sess.run(None, inputs) + pred_onx = oinf.run(inputs) diff = numpy.abs( - pred_onx[1].ravel() - + pred_onx['probabilities'].ravel() - pipe.predict_proba(df).ravel()).sum() if verbose: - from mlprodict.onnxrt import OnnxInference - def td(a): if hasattr(a, 'todense'): b = a.todense() @@ -191,7 +189,8 @@ def td(a): if diff > 0.1: for i, (l1, l2) in enumerate( - zip(pipe.predict_proba(df), pred_onx[1])): + zip(pipe.predict_proba(df), + pred_onx['probabilities'])): d = numpy.abs(l1 - l2).sum() if verbose and d > 0.1: print("\nDISCREPENCY DETAILS") diff --git a/requirements-dev.txt b/requirements-dev.txt index 4078c9e5..6d172040 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,11 +6,11 @@ lightgbm loky matplotlib mlinsights -mlprodict>=0.5 +mlprodict>=0.6 nbsphinx onnx>=1.8.0 onnxconverter-common -onnxruntime>=1.6.0 +onnxruntime>=1.8.0 pillow py-spy pandas From c750bea40633a58cdda3b43e0596f8321af4e968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 12 Jul 2021 16:20:34 +0200 Subject: [PATCH 09/16] Update requirements-dev.txt --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 6d172040..f60e2f44 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,6 @@ matplotlib mlinsights mlprodict>=0.6 nbsphinx -onnx>=1.8.0 onnxconverter-common onnxruntime>=1.8.0 pillow From e9bbecd6c11f53eaf5f019cf8aaf5248134f94b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 12 Jul 2021 20:38:53 +0200 Subject: [PATCH 10/16] split unit test --- ...les.py => test_documentation_examples1.py} | 6 +- tests/test_documentation_examples2.py | 118 ++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) rename tests/{test_documentation_examples.py => test_documentation_examples1.py} (96%) create mode 100644 tests/test_documentation_examples2.py diff --git a/tests/test_documentation_examples.py b/tests/test_documentation_examples1.py similarity index 96% rename from tests/test_documentation_examples.py rename to tests/test_documentation_examples1.py index e6a2c9ce..171afe2e 100644 --- a/tests/test_documentation_examples.py +++ b/tests/test_documentation_examples1.py @@ -25,9 +25,9 @@ def import_source(module_file_path, module_name): return module_spec.loader.exec_module(module) -class TestDocumentationExample(unittest.TestCase): +class TestDocumentationExample1(unittest.TestCase): - def test_documentation_examples(self): + def test_documentation_examples1(self): this = os.path.abspath(os.path.dirname(__file__)) onxc = os.path.normpath(os.path.join(this, '..')) @@ -40,6 +40,8 @@ def test_documentation_examples(self): found = os.listdir(fold) tested = 0 for name in sorted(found): + if name.replace("\\", "/").split("/")[-1] >= "m": + break if '-v' in sys.argv: if name.endswith('plot_bbegin_measure_time.py'): diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py new file mode 100644 index 00000000..ba6ab119 --- /dev/null +++ b/tests/test_documentation_examples2.py @@ -0,0 +1,118 @@ +""" +Tests examples from the documentation. +""" +import unittest +from distutils.version import StrictVersion +import os +import sys +import importlib +import subprocess +from datetime import datetime +import onnxruntime + + +def import_source(module_file_path, module_name): + if not os.path.exists(module_file_path): + raise FileNotFoundError(module_file_path) + module_spec = importlib.util.spec_from_file_location( + module_name, module_file_path) + if module_spec is None: + raise FileNotFoundError( + "Unable to find '{}' in '{}', cwd='{}'.".format( + module_name, module_file_path, + os.path.abspath(__file__))) + module = importlib.util.module_from_spec(module_spec) + return module_spec.loader.exec_module(module) + + +class TestDocumentationExample2(unittest.TestCase): + + def test_documentation_examples2(self): + + this = os.path.abspath(os.path.dirname(__file__)) + onxc = os.path.normpath(os.path.join(this, '..')) + pypath = os.environ.get('PYTHONPATH', None) + sep = ";" if sys.platform == 'win32' else ':' + pypath = "" if pypath in (None, "") else (pypath + sep) + pypath += onxc + os.environ['PYTHONPATH'] = pypath + fold = os.path.normpath(os.path.join(this, '..', 'examples')) + found = os.listdir(fold) + tested = 0 + for name in sorted(found): + if name.replace("\\", "/").split("/")[-1] < "m": + continue + + if '-v' in sys.argv: + if name.endswith('plot_bbegin_measure_time.py'): + if __name__ == "__main__": + print("%s: skip %r" % ( + datetime.now().strftime("%d-%m-%y %H:%M:%S"), + name)) + continue + + with self.subTest(name=name): + if name.startswith("plot_") and name.endswith(".py"): + if (name == "plot_pipeline_lightgbm.py" and + StrictVersion(onnxruntime.__version__) < + StrictVersion('1.0.0')): + continue + if __name__ == "__main__" or "-v" in sys.argv: + print("%s: run %r" % ( + datetime.now().strftime("%d-%m-%y %H:%M:%S"), + name)) + sys.path.insert(0, fold) + try: + mod = import_source(fold, os.path.splitext(name)[0]) + assert mod is not None + except FileNotFoundError: + # try another way + cmds = [sys.executable, "-u", + os.path.join(fold, name)] + p = subprocess.Popen( + cmds, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + res = p.communicate() + out, err = res + st = err.decode('ascii', errors='ignore') + if len(st) > 0 and 'Traceback' in st: + if "No such file or directory: 'dot': 'dot'" in st: + # dot not installed, this part + # is tested in onnx framework + pass + elif '"dot" not found in path.' in st: + # dot not installed, this part + # is tested in onnx framework + pass + elif "No module named 'xgboost'" in st: + # xgboost not installed on CI + pass + elif ("cannot import name 'LightGbmModelContainer'" + " from 'onnxmltools.convert.common." + "_container'") in st: + # onnxmltools not recent enough + pass + elif ('Please fix either the inputs or ' + 'the model.') in st: + # onnxruntime datasets changed in master + # branch, still the same in released + # version on pypi + pass + elif 'dot: graph is too large' in st: + # graph is too big + pass + else: + raise RuntimeError( + "Example '{}' (cmd: {} - exec_prefix=" + "'{}') failed due to\n{}" + "".format(name, cmds, sys.exec_prefix, st)) + finally: + if sys.path[0] == fold: + del sys.path[0] + tested += 1 + if tested == 0: + raise RuntimeError("No example was tested.") + + +if __name__ == "__main__": + unittest.main() From 55132cc8398489f321f2dacf5ec2bcca8cd22b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 12 Jul 2021 21:15:35 +0200 Subject: [PATCH 11/16] ut --- tests/model.onnx | Bin 0 -> 6737 bytes tests/test_documentation_examples1.py | 2 +- tests/test_documentation_examples2.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tests/model.onnx diff --git a/tests/model.onnx b/tests/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..283925c03b95ed1f938ac2fdd27250407e76e97d GIT binary patch literal 6737 zcmeGhYit}x`SyIzv3)sjoX9=$(g9U+O-$@0CbsU2FSZgj#!1@5rNNPV?49@QmG^cp zyX)ALMx<&hq0$s2C{>b%m*nl?l&6=$q{=>?=}r%HWBRV?Csp13@chE`MufK?dyADC)$Am zjymh07JE`DsaZ!gY}1mX1zmA4td1wbM^rkM>o*uzjdvRA*5~Gssge zBg3?cimpA34T&FhEX|lW#4%Q~4wKq|$No?$?=4dC?4&R*Q$4+&l1#je5?iZnT5`jV7CuyIGBEMsHNQ~!g&FI@ILMm_q?DVsn-nhJ0V`%LDO{^4%2 zd}8A7MpYu@7|+3OmV`l)gk6he;Sp4wuc(GqmK{qm>;hJ-j^mZlr;A1+G-w)m#Yt91 zlA6k_X7kMMMT;=WQEt_9*kVhv6btR|ja%;&lb4pIr@t#xd#M%){RreXT zTEvIPAXzu9py6C=fKP2i%{j8M>CUiWz~!eNtW;?;e27ukMo8# zK1XtiH$LZI;C%P9OUfk~`@Do&nUMxgd}1h0;%6A*___Z41EO1 z&v_X8@EL$dFTF_UvT0bG{kse2&fZ0cF-gbRvt_v4c^lx`o5#}7^YShiF1PD0Ty44F z!s+u@%J9wav;q9)>$!RO`6d-$)$T6=Jn`?LG+em#x(nMs<8$H7=VgFX=e}5mts8#` z@V%x3Y53%=4fBxed)tKzCpNn9(Oatl4*YJ+g=em8apA_FP5|^@cm!bnsoiOK;L)GB z@Wxv|D?{(M-b%x+8=o)3V_h!+%=K+b!v{hS&BOLzz3M`+<1rT=czUP|TjsCK!z+bf zr(xtn8vs6b>8EM<)VkNp@ab1S3-Fh_kGl}rbgB$LQl0~d#vV$;UAZ zd3g3KmJ2`V{c#z3zIHhc$0An&UO90P;BN&z4KLpOt_zP`oGZg$uir?M-M1b;cgZHE zd)>}zhSRgNa#oX7`l=tvB;)ODLDBJpx03t;oHJz4L~u^9y%WJ16QD51aGDUMBUBu7 zXI!3~ccW`5uF!(b%$G(Uc=mD+7Nn9GUG$@PB}S^^Oj~rm%Ph978-y6Drq=HdSS z&${=W_e1hO4+%LFulUhMmaat&zup)V?BykkQ#U_l<6&A|*x6#5aVT^hREO))t}l#b{koDi)`6e$>XYhdk_g z29azb!6Es>pNk}a2rOb4jjvK0t&~fYvwU4kbMg0loTGc38nj|bgC|ND~?8c05233 zubZP{OdCd{Cd8DN+Drxfa@{U*&FL)dsJzA2W3i>6Zl%sVDT<q&7sPc(@P!hyO_fRo`3id>}3* z<34;jy=FF;;r|1btLV`+{DVsiLJ>(0As_Nb{G;vc= "m": + if name >= "plot_u": break if '-v' in sys.argv: diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py index ba6ab119..5eec8e18 100644 --- a/tests/test_documentation_examples2.py +++ b/tests/test_documentation_examples2.py @@ -40,7 +40,7 @@ def test_documentation_examples2(self): found = os.listdir(fold) tested = 0 for name in sorted(found): - if name.replace("\\", "/").split("/")[-1] < "m": + if name < "plot_u": continue if '-v' in sys.argv: From 2e4aa2f1861732b6926e8c25ecaf3e3c7e90cadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 13 Jul 2021 00:44:29 +0200 Subject: [PATCH 12/16] skip one test on circleci --- .gitignore | 1 + tests/model.onnx | Bin 6737 -> 0 bytes tests/test_documentation_examples2.py | 2 ++ 3 files changed, 3 insertions(+) delete mode 100644 tests/model.onnx diff --git a/.gitignore b/.gitignore index e64bd609..aa155f86 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ tests/pipeline*.onnx temp_* examples/pipeline_lightgbm.onnx examples/model.onnx +tests/model.onnx diff --git a/tests/model.onnx b/tests/model.onnx deleted file mode 100644 index 283925c03b95ed1f938ac2fdd27250407e76e97d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6737 zcmeGhYit}x`SyIzv3)sjoX9=$(g9U+O-$@0CbsU2FSZgj#!1@5rNNPV?49@QmG^cp zyX)ALMx<&hq0$s2C{>b%m*nl?l&6=$q{=>?=}r%HWBRV?Csp13@chE`MufK?dyADC)$Am zjymh07JE`DsaZ!gY}1mX1zmA4td1wbM^rkM>o*uzjdvRA*5~Gssge zBg3?cimpA34T&FhEX|lW#4%Q~4wKq|$No?$?=4dC?4&R*Q$4+&l1#je5?iZnT5`jV7CuyIGBEMsHNQ~!g&FI@ILMm_q?DVsn-nhJ0V`%LDO{^4%2 zd}8A7MpYu@7|+3OmV`l)gk6he;Sp4wuc(GqmK{qm>;hJ-j^mZlr;A1+G-w)m#Yt91 zlA6k_X7kMMMT;=WQEt_9*kVhv6btR|ja%;&lb4pIr@t#xd#M%){RreXT zTEvIPAXzu9py6C=fKP2i%{j8M>CUiWz~!eNtW;?;e27ukMo8# zK1XtiH$LZI;C%P9OUfk~`@Do&nUMxgd}1h0;%6A*___Z41EO1 z&v_X8@EL$dFTF_UvT0bG{kse2&fZ0cF-gbRvt_v4c^lx`o5#}7^YShiF1PD0Ty44F z!s+u@%J9wav;q9)>$!RO`6d-$)$T6=Jn`?LG+em#x(nMs<8$H7=VgFX=e}5mts8#` z@V%x3Y53%=4fBxed)tKzCpNn9(Oatl4*YJ+g=em8apA_FP5|^@cm!bnsoiOK;L)GB z@Wxv|D?{(M-b%x+8=o)3V_h!+%=K+b!v{hS&BOLzz3M`+<1rT=czUP|TjsCK!z+bf zr(xtn8vs6b>8EM<)VkNp@ab1S3-Fh_kGl}rbgB$LQl0~d#vV$;UAZ zd3g3KmJ2`V{c#z3zIHhc$0An&UO90P;BN&z4KLpOt_zP`oGZg$uir?M-M1b;cgZHE zd)>}zhSRgNa#oX7`l=tvB;)ODLDBJpx03t;oHJz4L~u^9y%WJ16QD51aGDUMBUBu7 zXI!3~ccW`5uF!(b%$G(Uc=mD+7Nn9GUG$@PB}S^^Oj~rm%Ph978-y6Drq=HdSS z&${=W_e1hO4+%LFulUhMmaat&zup)V?BykkQ#U_l<6&A|*x6#5aVT^hREO))t}l#b{koDi)`6e$>XYhdk_g z29azb!6Es>pNk}a2rOb4jjvK0t&~fYvwU4kbMg0loTGc38nj|bgC|ND~?8c05233 zubZP{OdCd{Cd8DN+Drxfa@{U*&FL)dsJzA2W3i>6Zl%sVDT<q&7sPc(@P!hyO_fRo`3id>}3* z<34;jy=FF;;r|1btLV`+{DVsiLJ>(0As_Nb{G;vc Date: Tue, 13 Jul 2021 01:21:54 +0200 Subject: [PATCH 13/16] Update plot_usparse_xgboost.py --- examples/plot_usparse_xgboost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index b556f24c..2a1d185b 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -33,7 +33,7 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=F401 from sklearn.ensemble import ( RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier @@ -190,7 +190,7 @@ def td(a): if diff > 0.1: for i, (l1, l2) in enumerate( zip(pipe.predict_proba(df), - pred_onx['probabilities'])): + pred_onx['probabilities'])): d = numpy.abs(l1 - l2).sum() if verbose and d > 0.1: print("\nDISCREPENCY DETAILS") From d9d563bf92834823c6106a6ac450ba3b1afc5831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 13 Jul 2021 01:32:09 +0200 Subject: [PATCH 14/16] Update plot_usparse_xgboost.py --- examples/plot_usparse_xgboost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index 2a1d185b..40f71cca 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -33,7 +33,8 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer -from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=F401 +from sklearn.experimental import ( # pylint: disable=F401 + enable_hist_gradient_boosting) from sklearn.ensemble import ( RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier From 436cacdfd2980cc704151beca716fd8c580a2df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 13 Jul 2021 01:41:56 +0200 Subject: [PATCH 15/16] lint --- .circleci/config.yml | 12 ++++++------ examples/plot_usparse_xgboost.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0e5eaa33..c0af75d8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -39,12 +39,6 @@ jobs: . venv/bin/activate python setup.py build_ext --inplace - - run: - name: run tests - command: | - . venv/bin/activate - coverage run --omit=tests/test_*.py -m unittest discover tests -v - - run: name: flake8 command: | @@ -53,6 +47,12 @@ jobs: python -m flake8 onnxcustom python -m flake8 examples + - run: + name: run tests + command: | + . venv/bin/activate + coverage run --omit=tests/test_*.py -m unittest discover tests -v + - run: name: coverage command: | diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index 40f71cca..cdbad137 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -34,7 +34,7 @@ from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.experimental import ( # pylint: disable=F401 - enable_hist_gradient_boosting) + enable_hist_gradient_boosting) # pylint: disable=F401 from sklearn.ensemble import ( RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier From 9d7f4f2b5387cb5dc49f7b21efd5266ace755fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 13 Jul 2021 01:43:37 +0200 Subject: [PATCH 16/16] Update plot_usparse_xgboost.py --- examples/plot_usparse_xgboost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py index cdbad137..6b1860e4 100644 --- a/examples/plot_usparse_xgboost.py +++ b/examples/plot_usparse_xgboost.py @@ -33,8 +33,8 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer -from sklearn.experimental import ( # pylint: disable=F401 - enable_hist_gradient_boosting) # pylint: disable=F401 +from sklearn.experimental import ( # noqa + enable_hist_gradient_boosting) # noqa from sklearn.ensemble import ( RandomForestClassifier, HistGradientBoostingClassifier) from xgboost import XGBClassifier