From 623d2ecc94b7859da852f07a422f4e970b6cbc3c Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 5 Nov 2020 21:06:47 -0500 Subject: [PATCH 1/5] sklearn instrumentation --- .../CHANGELOG.md | 5 + .../LICENSE | 201 +++++ .../MANIFEST.in | 9 + .../README.rst | 23 + .../setup.cfg | 55 ++ .../setup.py | 31 + .../instrumentation/sklearn/__init__.py | 757 ++++++++++++++++++ .../instrumentation/sklearn/version.py | 1 + .../tests/__init__.py | 0 .../tests/fixtures.py | 40 + .../tests/test_sklearn.py | 161 ++++ 11 files changed, 1283 insertions(+) create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/LICENSE create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/README.rst create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/setup.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md new file mode 100644 index 0000000000..3e04402cea --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## Unreleased + +- Initial release diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE b/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in b/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in new file mode 100644 index 0000000000..aed3e33273 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in @@ -0,0 +1,9 @@ +graft src +graft tests +global-exclude *.pyc +global-exclude *.pyo +global-exclude __pycache__/* +include CHANGELOG.md +include MANIFEST.in +include README.rst +include LICENSE diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/README.rst b/instrumentation/opentelemetry-instrumentation-sklearn/README.rst new file mode 100644 index 0000000000..20679b1011 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/README.rst @@ -0,0 +1,23 @@ +OpenTelemetry Scikit-Learn Instrumentation +========================================== + +|pypi| + +.. |pypi| image:: https://badge.fury.io/py/opentelemetry-instrumentation-sklearn.svg + :target: https://pypi.org/project/opentelemetry-instrumentation-sklearn/ + +This library allows tracing HTTP requests made by the +`scikit-learn `_ library. + +Installation +------------ + +:: + + pip install opentelemetry-instrumentation-sklearn + +References +---------- + +* `OpenTelemetry sklearn Instrumentation `_ +* `OpenTelemetry Project `_ diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg new file mode 100644 index 0000000000..dc6b8e0204 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -0,0 +1,55 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[metadata] +name = opentelemetry-instrumentation-sklearn +description = OpenTelemetry sklearn instrumentation +long_description = file: README.rst +long_description_content_type = text/x-rst +author = OpenTelemetry Authors +author_email = cncf-opentelemetry-contributors@lists.cncf.io +url = https://github.com/open-telemetry/opentelemetry-python/tree/master/instrumentation/opentelemetry-instrumentation-sklearn +platforms = any +license = Apache-2.0 +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + License :: OSI Approved :: Apache Software License + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + +[options] +python_requires = >=3.5 +package_dir= + =src +packages=find_namespace: +install_requires = + opentelemetry-api == 0.16.dev0 + opentelemetry-instrumentation == 0.16.dev0 + scikit-learn ~= 0.22.0 + +[options.extras_require] +test = + opentelemetry-test == 0.16.dev0 + +[options.packages.find] +where = src + +[options.entry_points] +opentelemetry_instrumentor = + sklearn = opentelemetry.instrumentation.sklearn:SklearnInstrumentor diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.py b/instrumentation/opentelemetry-instrumentation-sklearn/setup.py new file mode 100644 index 0000000000..92f53925e0 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.py @@ -0,0 +1,31 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import setuptools + +BASE_DIR = os.path.dirname(__file__) +VERSION_FILENAME = os.path.join( + BASE_DIR, + "src", + "opentelemetry", + "instrumentation", + "sklearn", + "version.py", +) +PACKAGE_INFO = {} +with open(VERSION_FILENAME) as f: + exec(f.read(), PACKAGE_INFO) + +setuptools.setup(version=PACKAGE_INFO["__version__"]) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py new file mode 100644 index 0000000000..d7ac330d1e --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -0,0 +1,757 @@ +# Copyright 2020, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +The integration with sklearn supports the scikit-learn compatible libraries, +it can be enabled by using ``SklearnInstrumentor``. + +.. sklearn: https://github.com/scikit-learn/scikit-learn + +Usage +----- + +Package instrumentation example: + +.. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + + # instrument the sklearn library + SklearnInstrumentor().instrument() + + # instrument sklearn and other libraries + SklearnInstrumentor( + packages=["sklearn", "lightgbm", "xgboost"] + ).instrument() + + +Model intrumentation example: + +.. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + from sklearn.datasets import load_iris + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline + + X, y = load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = Pipeline( + [ + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + + model.fit(X_train, y_train) + + SklearnInstrumentor().instrument_estimator(model) + +""" +import logging +import os +from functools import wraps +from importlib import import_module +from inspect import isclass +from pkgutil import iter_modules +from typing import Callable, Dict, List, MutableMapping, Sequence, Type, Union + +from sklearn.base import BaseEstimator +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.tree import BaseDecisionTree +from sklearn.utils.metaestimators import _IffHasAttrDescriptor + +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.sklearn.version import __version__ +from opentelemetry.trace import get_tracer +from opentelemetry.util.types import Attributes + +logger = logging.getLogger(__name__) + + +def implement_spans( + func: Callable, + estimator: Union[BaseEstimator, Type[BaseEstimator]], + attributes: Attributes = None, +): + """Wrap the method call with a span. + + Args: + func: A callable to be wrapped in a span + estimator: An instance or class of an estimator + attributes: Attributes to apply to the span + + Returns: + The passed function wrapped in a span. + """ + if isclass(estimator): + name = estimator.__name__ + else: + name = estimator.__class__.__name__ + logger.debug("Instrumenting: %s.%s", name, func.__name__) + + attributes = attributes or {} + + @wraps(func) + def wrapper(*args, **kwargs): + with get_tracer(__name__, __version__).start_as_current_span( + name="{cls}.{func}".format(cls=name, func=func.__name__), + ) as span: + if span.is_recording(): + for key, val in attributes.items(): + span.set_attribute(key, val) + return func(*args, **kwargs) + + return wrapper + + +def implement_spans_delegator( + obj: _IffHasAttrDescriptor, attributes: Attributes = None +): + """Wrap the descriptor's fn with a span. + + Args: + obj: An instance of _IffHasAttrDescriptor + attributes: Attributes to apply to the span + """ + # Don't instrument inherited delegators + if hasattr(obj, "_otel_original_fn"): + logger.debug("Already instrumented: %s", obj.fn.__qualname__) + return + + attributes = attributes or {} + + def implement_spans_fn(func: Callable): + @wraps(func) + def wrapper(*args, **kwargs): + with get_tracer(__name__, __version__).start_as_current_span( + name=func.__qualname__ + ) as span: + if span.is_recording(): + for key, val in attributes.items(): + span.set_attribute(key, val) + return func(*args, **kwargs) + + return wrapper + + logger.debug("Instrumenting: %s", obj.fn.__qualname__) + + setattr(obj, "_otel_original_fn", getattr(obj, "fn")) + setattr(obj, "fn", implement_spans_fn(obj.fn)) + + +def get_delegator( + estimator: Type[BaseEstimator], method_name: str +) -> Union[_IffHasAttrDescriptor, None]: + """Get the delegator from a class method or None. + + Args: + estimator: A class derived from ``sklearn``'s ``BaseEstimator``. + method_name (str): The method name of the estimator on which to + check for delegation. + + Returns: + The delegator, if one exists, otherwise None. + """ + class_attr = getattr(estimator, method_name) + if getattr(class_attr, "__closure__", None) is not None: + for cell in class_attr.__closure__: + if isinstance(cell.cell_contents, _IffHasAttrDescriptor): + return cell.cell_contents + return None + + +def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: + """Walk package hierarchies to get BaseEstimator-derived classes. + + Args: + packages (list(str)): A list of package names to instrument. + + Returns: + A dictionary of qualnames and classes inheriting from + ``BaseEstimator``. + """ + klasses = dict() + for package_name in packages: + lib = import_module(package_name) + package_dir = os.path.dirname(lib.__file__) + for (_, module_name, _) in iter_modules([package_dir]): + # import the module and iterate through its attributes + try: + module = import_module(package_name + "." + module_name) + except ImportError: + logger.warning( + "Unable to import %s.%s", package_name, module_name + ) + continue + for attribute_name in dir(module): + attrib = getattr(module, attribute_name) + if isclass(attrib) and issubclass(attrib, BaseEstimator): + klasses[ + ".".join([package_name, module_name, attribute_name]) + ] = attrib + return klasses + + +# Methods on which spans should be applied. +DEFAULT_METHODS = [ + "fit", + "transform", + "predict", + "predict_proba", + "_fit", + "_transform", + "_predict", + "_predict_proba", +] + +# Classes and their attributes which contain a list of tupled estimators +# through which we should walk recursively for estimators. +DEFAULT_NAMEDTUPLE_ATTRIBS = { + Pipeline: ["steps"], + FeatureUnion: ["transformer_list"], +} + +# Classes and their attributes which contain an estimator or sequence of +# estimators through which we should walk recursively for estimators. +DEFAULT_ATTRIBS = {} + +# Classes (including children) explicitly excluded from autoinstrumentation +DEFAULT_EXCLUDE_CLASSES = [BaseDecisionTree] + +# Default packages for autoinstrumentation +DEFAULT_PACKAGES = ["sklearn"] + + +class SklearnInstrumentor(BaseInstrumentor): + """Instrument a fitted sklearn model with opentelemetry spans. + + Instrument methods of ``BaseEstimator``-derived components in a sklearn + model. The assumption is that a machine learning model ``Pipeline`` (or + class descendent) is being instrumented with opentelemetry. Within a + ``Pipeline`` is some hierarchy of estimators and transformers. + + The ``instrument_estimator`` method walks this hierarchy of estimators, + implementing each of the defined methods with its own span. + + Certain estimators in the sklearn ecosystem contain other estimators as + instance attributes. Support for walking this embedded sub-hierarchy is + supported with ``recurse_attribs``. This argument is a dictionary + with classes as keys, and a list of attributes representing embedded + estimators as values. By default, ``recurse_attribs`` is empty. + + Similar to Pipelines, there are also estimators which have class attributes + as a list of 2-tuples; for instance, the ``FeatureUnion`` and its attribute + ``transformer_list``. Instrumenting estimators like this is also + supported through the ``recurse_namedtuple_attribs`` argument. This + argument is a dictionary with classes as keys, and a list of attribute + names representing the namedtuple list(s). By default, the + ``recurse_namedtuple_attribs`` dictionary supports + ``Pipeline`` with ``steps``, and ``FeatureUnion`` with + ``transformer_list``. + + Note that spans will not be generated for any child transformer whose + parent transformer has ``n_jobs`` parameter set to anything besides + ``None`` or ``1``. + + Package instrumentation example: + + .. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + + # instrument the sklearn library + SklearnInstrumentor().instrument() + + # instrument several sklearn-compatible libraries + packages = ["sklearn", "lightgbm", "xgboost"] + SklearnInstrumentor(packages=packages).instrument() + + + Model intrumentation example: + + .. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + from sklearn.datasets import load_iris + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline + + X, y = load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = Pipeline( + [ + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + + model.fit(X_train, y_train) + + SklearnInstrumentor().instrument_estimator(model) + + Args: + methods (list): A list of method names on which to instrument a span. + This list of methods will be checked on all estimators in the model + hierarchy. Used in package and model instrumentation + recurse_attribs (dict): A dictionary of ``BaseEstimator``-derived + sklearn classes as keys, with values being a list of attributes. Each + attribute represents either an estimator or list of estimators on + which to also implement spans. An example is + ``RandomForestClassifier`` and its attribute ``estimators_``. Used + in model instrumentation only. + recurse_namedtuple_attribs (dict): A dictionary of ``BaseEstimator``- + derived sklearn types as keys, with values being a list of + attribute names. Each attribute represents a list of 2-tuples in + which the first element is the estimator name, and the second + element is the estimator. Defaults include sklearn's ``Pipeline`` + and its attribute ``steps``, and the ``FeatureUnion`` and its + attribute ``transformer_list``. Used in model instrumentation only. + packages: A list of sklearn-compatible packages to + instrument. Used with package instrumentation only. + exclude_classes: A list of classes to exclude from instrumentation. + Child classes are also excluded. Default is sklearn's + ``[BaseDecisionTree]``. + """ + + def __new__(cls, *args, **kwargs): + """Override new. + + The base class' new method passes args and kwargs. We override because + we init the class with configuration and Python raises TypeError when + additional arguments are passed to the object.__new__() method. + """ + if cls._instance is None: + cls._instance = object.__new__(cls) + + return cls._instance + + def __init__( + self, + methods: List[str] = None, + recurse_attribs: Dict[Type[BaseEstimator], List[str]] = None, + recurse_namedtuple_attribs: Dict[ + Type[BaseEstimator], List[str] + ] = None, + packages: List[str] = None, + exclude_classes: List[Type] = None, + ): + self.methods = methods or DEFAULT_METHODS + self.recurse_attribs = recurse_attribs or DEFAULT_ATTRIBS + self.recurse_namedtuple_attribs = ( + recurse_namedtuple_attribs or DEFAULT_NAMEDTUPLE_ATTRIBS + ) + self.packages = packages or DEFAULT_PACKAGES + if exclude_classes is None: + self.exclude_classes = tuple(DEFAULT_EXCLUDE_CLASSES) + else: + self.exclude_classes = tuple(exclude_classes) + + def _instrument(self, **kwargs): + """Instrument the library, and any additional specified on init.""" + klasses = get_base_estimators(packages=self.packages) + attributes = kwargs.get("attributes") + for _, klass in klasses.items(): + if issubclass(klass, self.exclude_classes): + logger.debug("Not instrumenting (excluded): %s", str(klass)) + else: + logger.debug("Instrumenting: %s", str(klass)) + for method_name in self.methods: + if hasattr(klass, method_name): + self._instrument_class_method( + estimator=klass, + method_name=method_name, + attributes=attributes, + ) + + def _uninstrument(self, **kwargs): + """Uninstrument the library""" + klasses = get_base_estimators(packages=self.packages) + for _, klass in klasses.items(): + logger.debug("Uninstrumenting: %s", str(klass)) + for method_name in self.methods: + if hasattr(klass, method_name): + self._uninstrument_class_method( + estimator=klass, method_name=method_name + ) + + def instrument_estimator( + self, estimator: BaseEstimator, attributes: Attributes = None + ): + """Instrument a fitted estimator and its hierarchy where configured. + + Args: + estimator (sklearn.base.BaseEstimator): A fitted ``sklearn`` + estimator, typically a ``Pipeline`` instance. + attributes (dict): Attributes to attach to the spans. + """ + if isinstance(estimator, self.exclude_classes): + logger.debug( + "Not instrumenting (excluded): %s", + estimator.__class__.__name__, + ) + return + + if isinstance( + estimator, tuple(self.recurse_namedtuple_attribs.keys()) + ): + self._instrument_estimator_namedtuple( + estimator=estimator, attributes=attributes + ) + + if isinstance(estimator, tuple(self.recurse_attribs.keys())): + self._instrument_estimator_attribute( + estimator=estimator, attributes=attributes + ) + + for method_name in self.methods: + if hasattr(estimator, method_name): + self._instrument_instance_method( + estimator=estimator, + method_name=method_name, + attributes=attributes, + ) + + def uninstrument_estimator(self, estimator: BaseEstimator): + """Uninstrument a fitted estimator and its hierarchy where configured. + + Args: + estimator (sklearn.base.BaseEstimator): A fitted ``sklearn`` + estimator, typically a ``Pipeline`` instance. + """ + if isinstance(estimator, self.exclude_classes): + logger.debug( + "Not uninstrumenting (excluded): %s", + estimator.__class__.__name__, + ) + return + + if isinstance( + estimator, tuple(self.recurse_namedtuple_attribs.keys()) + ): + self._uninstrument_estimator_namedtuple(estimator=estimator) + + if isinstance(estimator, tuple(self.recurse_attribs.keys())): + self._uninstrument_estimator_attribute(estimator=estimator) + + for method_name in self.methods: + if hasattr(estimator, method_name): + self._uninstrument_instance_method( + estimator=estimator, method_name=method_name + ) + + def _check_instrumented( + self, + estimator: Union[BaseEstimator, Type[BaseEstimator]], + method_name: str, + ) -> bool: + """Check an estimator-method is instrumented. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + check for instrumentation. + """ + orig_method_name = "_otel_original_" + method_name + has_original = hasattr(estimator, orig_method_name) + orig_class, orig_method = getattr( + estimator, orig_method_name, (None, None) + ) + same_class = orig_class == estimator + if has_original and same_class: + class_method = self._unwrap_function( + getattr(estimator, method_name) + ) + # if they match then the subclass doesn't override + # if they don't then the overridden method needs instrumentation + if class_method.__name__ == orig_method.__name__: + return True + return False + + def _uninstrument_class_method( + self, estimator: Type[BaseEstimator], method_name: str + ): + """Uninstrument a class method. + + Replaces the patched method with the original, and deletes the + attribute which stored the original method. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + apply a span. + """ + orig_method_name = "_otel_original_" + method_name + if isclass(estimator): + qualname = estimator.__qualname__ + else: + qualname = estimator.__class__.__qualname__ + delegator = get_delegator(estimator, method_name) + if self._check_instrumented(estimator, method_name): + logger.debug( + "Uninstrumenting: %s.%s", qualname, method_name, + ) + _, orig_method = getattr(estimator, orig_method_name) + setattr( + estimator, method_name, orig_method, + ) + delattr(estimator, orig_method_name) + elif delegator is not None: + if not hasattr(delegator, "_otel_original_fn"): + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, + ) + return + setattr( + delegator, "fn", getattr(delegator, "_otel_original_fn"), + ) + delattr(delegator, "_otel_original_fn") + else: + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, + ) + + def _uninstrument_instance_method( + self, estimator: BaseEstimator, method_name: str + ): + """Uninstrument an instance method. + + Replaces the patched method with the original, and deletes the + attribute which stored the original method. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + apply a span. + """ + orig_method_name = "_otel_original_" + method_name + if isclass(estimator): + qualname = estimator.__qualname__ + else: + qualname = estimator.__class__.__qualname__ + if self._check_instrumented(estimator, method_name): + logger.debug( + "Uninstrumenting: %s.%s", qualname, method_name, + ) + _, orig_method = getattr(estimator, orig_method_name) + setattr( + estimator, method_name, orig_method, + ) + delattr(estimator, orig_method_name) + else: + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, + ) + + def _instrument_class_method( + self, + estimator: Type[BaseEstimator], + method_name: str, + attributes: Attributes = None, + ): + """Instrument an estimator method with a span. + + When instrumenting we attach a tuple of (Class, method) to the + attribute ``_otel_original_`` for each method. This allows + us to replace the patched with the original in uninstrumentation, but + also allows proper instrumentation of child classes without + instrumenting inherited methods twice. + + Args: + estimator (BaseEstimator): A ``BaseEstimator``-derived + class + method_name (str): The method name of the estimator on which to + apply a span. + attributes (dict): Attributes to attach to the spans. + """ + if self._check_instrumented(estimator, method_name): + logger.debug( + "Already instrumented: %s.%s", + estimator.__qualname__, + method_name, + ) + return + class_attr = getattr(estimator, method_name) + delegator = get_delegator(estimator, method_name) + if isinstance(class_attr, property): + logger.debug( + "Not instrumenting found property: %s.%s", + estimator.__qualname__, + method_name, + ) + elif delegator is not None: + implement_spans_delegator(delegator) + else: + setattr( + estimator, + "_otel_original_" + method_name, + (estimator, class_attr), + ) + setattr( + estimator, + method_name, + implement_spans(class_attr, estimator, attributes), + ) + + def _unwrap_function(self, function): + """Fetch the function underlying any decorators""" + if hasattr(function, "__wrapped__"): + return self._unwrap_function(function.__wrapped__) + return function + + def _instrument_instance_method( + self, + estimator: BaseEstimator, + method_name: str, + attributes: Attributes = None, + ): + """Instrument an estimator instance method with a span. + + When instrumenting we attach a tuple of (Class, method) to the + attribute ``_otel_original_`` for each method. This allows + us to replace the patched with the original in unstrumentation. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator. + method_name (str): The method name of the estimator on which to + apply a span. + attributes (dict): Attributes to attach to the spans. + """ + if self._check_instrumented(estimator, method_name): + logger.debug( + "Already instrumented: %s.%s", + estimator.__class__.__qualname__, + method_name, + ) + return + + class_attr = getattr(type(estimator), method_name, None) + if isinstance(class_attr, property): + logger.debug( + "Not instrumenting found property: %s.%s", + estimator.__class__.__qualname__, + method_name, + ) + else: + method = getattr(estimator, method_name) + setattr( + estimator, "_otel_original_" + method_name, (estimator, method) + ) + setattr( + estimator, + method_name, + implement_spans(method, estimator, attributes), + ) + + def _instrument_estimator_attribute( + self, estimator: BaseEstimator, attributes: Attributes = None + ): + """Instrument instance attributes which also contain estimators. + + Handle instance attributes which are also estimators, are a list + (Sequence) of estimators, or are mappings (dictionary) in which + the values are estimators. + + Examples include ``RandomForestClassifier`` and + ``MultiOutputRegressor`` instances which have attributes + ``estimators_`` attributes. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an + attribute which also contains an estimator or collection of + estimators. + attributes (dict): Attributes to attach to the spans. + """ + attribs = self.recurse_attribs.get(estimator.__class__, []) + for attrib in attribs: + attrib_value = getattr(estimator, attrib) + if isinstance(attrib_value, Sequence): + for value in attrib_value: + self.instrument_estimator( + estimator=value, attributes=attributes + ) + elif isinstance(attrib_value, MutableMapping): + for value in attrib_value.values(): + self.instrument_estimator( + estimator=value, attributes=attributes + ) + else: + self.instrument_estimator( + estimator=attrib_value, attributes=attributes + ) + + def _instrument_estimator_namedtuple( + self, estimator: BaseEstimator, attributes: Attributes = None + ): + """Instrument attributes with (name, estimator) tupled components. + + Examples include Pipeline and FeatureUnion instances which + have attributes steps and transformer_list, respectively. + + Args: + estimator: A fitted sklearn estimator, with an attribute which also + contains an estimator or collection of estimators. + attributes (dict): Attributes to attach to the spans. + """ + attribs = self.recurse_namedtuple_attribs.get(estimator.__class__, []) + for attrib in attribs: + for _, est in getattr(estimator, attrib): + self.instrument_estimator(estimator=est, attributes=attributes) + + def _uninstrument_estimator_attribute(self, estimator: BaseEstimator): + """Uninstrument instance attributes which also contain estimators. + + Handle instance attributes which are also estimators, are a list + (Sequence) of estimators, or are mappings (dictionary) in which + the values are estimators. + + Examples include ``RandomForestClassifier`` and + ``MultiOutputRegressor`` instances which have attributes + ``estimators_`` attributes. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an + attribute which also contains an estimator or collection of + estimators. + """ + attribs = self.recurse_attribs.get(estimator.__class__, []) + for attrib in attribs: + attrib_value = getattr(estimator, attrib) + if isinstance(attrib_value, Sequence): + for value in attrib_value: + self.uninstrument_estimator(estimator=value) + elif isinstance(attrib_value, MutableMapping): + for value in attrib_value.values(): + self.uninstrument_estimator(estimator=value) + else: + self.uninstrument_estimator(estimator=attrib_value) + + def _uninstrument_estimator_namedtuple(self, estimator: BaseEstimator): + """Uninstrument attributes with (name, estimator) tupled components. + + Examples include Pipeline and FeatureUnion instances which + have attributes steps and transformer_list, respectively. + + Args: + estimator: A fitted sklearn estimator, with an attribute which also + contains an estimator or collection of estimators. + """ + attribs = self.recurse_namedtuple_attribs.get(estimator.__class__, []) + for attrib in attribs: + for _, est in getattr(estimator, attrib): + self.uninstrument_estimator(estimator=est) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py new file mode 100644 index 0000000000..4c4951dcda --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py @@ -0,0 +1 @@ +__version__ = "0.16.dev0" diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py new file mode 100644 index 0000000000..4f9b8aa47e --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py @@ -0,0 +1,40 @@ +import numpy as np +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.preprocessing import Normalizer, StandardScaler + +X, y = load_iris(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + + +def pipeline(): + """A dummy model that has a bunch of components that we can test.""" + model = Pipeline( + [ + ("scaler", StandardScaler()), + ("normal", Normalizer()), + ( + "union", + FeatureUnion( + [ + ("pca", PCA(n_components=1)), + ("svd", TruncatedSVD(n_components=2)), + ], + n_jobs=1, # parallelized components won't generate spans + ), + ), + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + model.fit(X_train, y_train) + return model + + +def random_input(): + """A random record from the feature set.""" + rows = X.shape[0] + random_row = np.random.choice(rows, size=1) + return X[random_row, :] diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py new file mode 100644 index 0000000000..2aa43e700a --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -0,0 +1,161 @@ +from sklearn.ensemble import RandomForestClassifier + +from opentelemetry.instrumentation.sklearn import ( + DEFAULT_EXCLUDE_CLASSES, + DEFAULT_METHODS, + SklearnInstrumentor, + get_base_estimators, + get_delegator, +) +from opentelemetry.test.test_base import TestBase +from opentelemetry.trace import SpanKind + +from .fixtures import pipeline, random_input + + +def assert_instrumented(base_estimators): + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + original_method_name = "_otel_original_" + method_name + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, original_method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, original_method_name) + continue + delegator = None + if hasattr(estimator, method_name): + delegator = get_delegator(estimator, method_name) + if delegator is not None: + assert hasattr(delegator, "_otel_original_fn") + elif hasattr(estimator, method_name): + assert hasattr(estimator, original_method_name) + + +def assert_uninstrumented(base_estimators): + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + original_method_name = "_otel_original_" + method_name + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, original_method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, original_method_name) + continue + delegator = None + if hasattr(estimator, method_name): + delegator = get_delegator(estimator, method_name) + if delegator is not None: + assert not hasattr(delegator, "_otel_original_fn") + elif hasattr(estimator, method_name): + assert not hasattr(estimator, original_method_name) + + +class TestSklearn(TestBase): + def test_package_instrumentation(self): + ski = SklearnInstrumentor() + + base_estimators = get_base_estimators(packages=["sklearn"]) + + model = pipeline() + + ski.instrument() + assert_instrumented(base_estimators) + + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 8) + self.memory_exporter.clear() + + ski.uninstrument() + assert_uninstrumented(base_estimators) + + model = pipeline() + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) + + def test_span_properties(self): + """Test that we get all of the spans we expect.""" + model = pipeline() + ski = SklearnInstrumentor() + ski.instrument_estimator(estimator=model) + + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 8) + span = spans[0] + self.assertEqual(span.name, "StandardScaler.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[1] + self.assertEqual(span.name, "Normalizer.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[2] + self.assertEqual(span.name, "PCA.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[4].context.span_id) + span = spans[3] + self.assertEqual(span.name, "TruncatedSVD.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[4].context.span_id) + span = spans[4] + self.assertEqual(span.name, "FeatureUnion.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[5] + self.assertEqual(span.name, "RandomForestClassifier.predict_proba") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[6].context.span_id) + span = spans[6] + self.assertEqual(span.name, "RandomForestClassifier.predict") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[7] + self.assertEqual(span.name, "Pipeline.predict") + self.assertEqual(span.kind, SpanKind.INTERNAL) + + self.memory_exporter.clear() + + # uninstrument + ski.uninstrument_estimator(estimator=model) + x_test = random_input() + model.predict(x_test) + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) + + def test_attrib_config(self): + """Test that the attribute config makes spans on the decision trees.""" + model = pipeline() + attrib_config = {RandomForestClassifier: ["estimators_"]} + ski = SklearnInstrumentor( + recurse_attribs=attrib_config, + exclude_classes=[], # decision trees excluded by default + ) + ski.instrument_estimator(estimator=model) + + x_test = random_input() + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 8 + model.steps[-1][-1].n_estimators) + + self.memory_exporter.clear() + + ski.uninstrument_estimator(estimator=model) + x_test = random_input() + model.predict(x_test) + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) From 369dba4480cbd2a042acab8478f2b9a50eedaf77 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 5 Nov 2020 21:40:13 -0500 Subject: [PATCH 2/5] missing test --- .../tests/test_sklearn.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index 2aa43e700a..df454b6fa9 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -159,3 +159,17 @@ def test_attrib_config(self): model.predict(x_test) spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 0) + + def test_span_attributes(self): + model = pipeline() + attributes = {"model_name": "random_forest_model"} + ski = SklearnInstrumentor() + ski.instrument_estimator(estimator=model, attributes=attributes) + + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + for span in spans: + assert span.attributes["model_name"] == "random_forest_model" From b5f8973a7d140cdcababb4e60436285a63e26ad8 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 7 Nov 2020 13:25:41 -0500 Subject: [PATCH 3/5] licenses and refactor span wrapper --- .../instrumentation/sklearn/__init__.py | 52 ++++++++++--------- .../instrumentation/sklearn/version.py | 14 +++++ .../tests/fixtures.py | 14 +++++ .../tests/test_sklearn.py | 14 +++++ 4 files changed, 69 insertions(+), 25 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index d7ac330d1e..90d57f05a6 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -80,7 +80,7 @@ logger = logging.getLogger(__name__) -def implement_spans( +def implement_span_estimator( func: Callable, estimator: Union[BaseEstimator, Type[BaseEstimator]], attributes: Attributes = None, @@ -100,13 +100,27 @@ def implement_spans( else: name = estimator.__class__.__name__ logger.debug("Instrumenting: %s.%s", name, func.__name__) - attributes = attributes or {} + name = "{cls}.{func}".format(cls=name, func=func.__name__) + return implement_span_function(func, name, attributes) + + +def implement_span_function(func: Callable, name: str, attributes: Attributes): + """Wrap the function with a span. + + Args: + func: A callable to be wrapped in a span + name: The name of the span + attributes: Attributes to apply to the span + + Returns: + The passed function wrapped in a span. + """ @wraps(func) def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( - name="{cls}.{func}".format(cls=name, func=func.__name__), + name=name ) as span: if span.is_recording(): for key, val in attributes.items(): @@ -116,7 +130,7 @@ def wrapper(*args, **kwargs): return wrapper -def implement_spans_delegator( +def implement_span_delegator( obj: _IffHasAttrDescriptor, attributes: Attributes = None ): """Wrap the descriptor's fn with a span. @@ -129,26 +143,14 @@ def implement_spans_delegator( if hasattr(obj, "_otel_original_fn"): logger.debug("Already instrumented: %s", obj.fn.__qualname__) return - - attributes = attributes or {} - - def implement_spans_fn(func: Callable): - @wraps(func) - def wrapper(*args, **kwargs): - with get_tracer(__name__, __version__).start_as_current_span( - name=func.__qualname__ - ) as span: - if span.is_recording(): - for key, val in attributes.items(): - span.set_attribute(key, val) - return func(*args, **kwargs) - - return wrapper - logger.debug("Instrumenting: %s", obj.fn.__qualname__) - + attributes = attributes or {} setattr(obj, "_otel_original_fn", getattr(obj, "fn")) - setattr(obj, "fn", implement_spans_fn(obj.fn)) + setattr( + obj, + "fn", + implement_span_function(obj.fn, obj.fn.__qualname__, attributes), + ) def get_delegator( @@ -595,7 +597,7 @@ def _instrument_class_method( method_name, ) elif delegator is not None: - implement_spans_delegator(delegator) + implement_span_delegator(delegator) else: setattr( estimator, @@ -605,7 +607,7 @@ def _instrument_class_method( setattr( estimator, method_name, - implement_spans(class_attr, estimator, attributes), + implement_span_estimator(class_attr, estimator, attributes), ) def _unwrap_function(self, function): @@ -655,7 +657,7 @@ def _instrument_instance_method( setattr( estimator, method_name, - implement_spans(method, estimator, attributes), + implement_span_estimator(method, estimator, attributes), ) def _instrument_estimator_attribute( diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py index 4c4951dcda..bb32120c79 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py @@ -1 +1,15 @@ +# Copyright 2020, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __version__ = "0.16.dev0" diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py index 4f9b8aa47e..cf26c0fcf2 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py @@ -1,3 +1,17 @@ +# Copyright 2020, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np from sklearn.datasets import load_iris from sklearn.decomposition import PCA, TruncatedSVD diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index df454b6fa9..ad4d032280 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -1,3 +1,17 @@ +# Copyright 2020, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from sklearn.ensemble import RandomForestClassifier from opentelemetry.instrumentation.sklearn import ( From 3f8eab3659b87536a963ef77bffc7b0d112efff9 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 7 Nov 2020 16:46:49 -0500 Subject: [PATCH 4/5] add pr link --- .../opentelemetry-instrumentation-sklearn/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md index 3e04402cea..5b9a46c48f 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md +++ b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md @@ -2,4 +2,4 @@ ## Unreleased -- Initial release +- Initial release ([#151](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/151)) From a91956ef9adac6ccbedd33765ddfff7f866f8340 Mon Sep 17 00:00:00 2001 From: Flynn Date: Mon, 9 Nov 2020 11:28:35 -0500 Subject: [PATCH 5/5] Update instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg Co-authored-by: Leighton Chen --- instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg index dc6b8e0204..dd15bda352 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -19,7 +19,7 @@ long_description = file: README.rst long_description_content_type = text/x-rst author = OpenTelemetry Authors author_email = cncf-opentelemetry-contributors@lists.cncf.io -url = https://github.com/open-telemetry/opentelemetry-python/tree/master/instrumentation/opentelemetry-instrumentation-sklearn +url = https://github.com/open-telemetry/opentelemetry-python-contrib/tree/master/instrumentation/opentelemetry-instrumentation-sklearn platforms = any license = Apache-2.0 classifiers =