diff --git a/README.md b/README.md index 91e4d21b22..e0ea89bcc3 100644 --- a/README.md +++ b/README.md @@ -139,12 +139,12 @@ deepsparse.benchmark [-h] [-b BATCH_SIZE] [-shapes INPUT_SHAPES] ## 👩‍💻 NLP Inference Example ```python -from deepsparse.transformers import pipeline +from deepsparse import Pipeline # SparseZoo model stub or path to ONNX file model_path = "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni" -qa_pipeline = pipeline( +qa_pipeline = Pipeline.create( task="question-answering", model_path=model_path, ) diff --git a/setup.py b/setup.py index 94d8cfc70c..8237ae3a29 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ _deps = [ "numpy>=1.16.3", "onnx>=1.5.0,<=1.10.1", + "pydantic>=1.8.2", "requests>=2.0.0", "tqdm>=4.0.0", "protobuf>=3.12.2", @@ -74,13 +75,22 @@ "uvicorn>=0.15.0", "fastapi>=0.70.0", "starlette>=0.16.0", - "pydantic>=1.8.2", "requests>=2.26.0", ] _onnxruntime_deps = [ "onnxruntime>=1.7.0", ] +_ic_integration_deps = [ + "click<8.1", + "opencv-python", +] + +_yolo_integration_deps = [ + "torchvision>=0.3.0,<=0.10.1", + "opencv-python", +] + class OverrideInstall(install): """ @@ -173,12 +183,15 @@ def _setup_extras() -> Dict: "dev": _dev_deps, "server": _server_deps, "onnxruntime": _onnxruntime_deps, + "image_classification": _ic_integration_deps, + "yolo": _yolo_integration_deps, } def _setup_entry_points() -> Dict: data_api_entrypoint = "deepsparse.transformers.pipelines_cli:cli" eval_downstream = "deepsparse.transformers.eval_downstream:main" + return { "console_scripts": [ f"deepsparse.transformers.run_inference={data_api_entrypoint}", @@ -187,6 +200,7 @@ def _setup_entry_points() -> Dict: "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability", "deepsparse.benchmark=deepsparse.benchmark.benchmark_model:main", "deepsparse.server=deepsparse.server.main:start_server", + "deepsparse.object_detection.annotate=deepsparse.yolo.annotate:main", ] } diff --git a/src/deepsparse/__init__.py b/src/deepsparse/__init__.py index 3d3113b74b..d9c28dc591 100644 --- a/src/deepsparse/__init__.py +++ b/src/deepsparse/__init__.py @@ -31,6 +31,7 @@ cpu_vnni_compatible, ) from .engine import * +from .pipeline import * from .version import __version__, is_release diff --git a/src/deepsparse/image_classification/__init__.py b/src/deepsparse/image_classification/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/src/deepsparse/image_classification/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/deepsparse/image_classification/constants.py b/src/deepsparse/image_classification/constants.py new file mode 100644 index 0000000000..d035e44513 --- /dev/null +++ b/src/deepsparse/image_classification/constants.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IMAGENET_RGB_MEANS = [0.485, 0.456, 0.406] +IMAGENET_RGB_STDS = [0.229, 0.224, 0.225] diff --git a/src/deepsparse/image_classification/pipelines.py b/src/deepsparse/image_classification/pipelines.py new file mode 100644 index 0000000000..e085937728 --- /dev/null +++ b/src/deepsparse/image_classification/pipelines.py @@ -0,0 +1,197 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Image classification pipeline +""" +import json +from typing import Dict, List, Optional, Tuple, Type, Union + +import numpy +import onnx + +from deepsparse.image_classification.constants import ( + IMAGENET_RGB_MEANS, + IMAGENET_RGB_STDS, +) +from deepsparse.image_classification.schemas import ( + ImageClassificationInput, + ImageClassificationOutput, +) +from deepsparse.pipeline import Pipeline +from deepsparse.utils import model_to_path + + +try: + import cv2 + + cv2_error = None +except ModuleNotFoundError as cv2_import_error: + cv2 = None + cv2_error = cv2_import_error + + +@Pipeline.register( + task="image_classification", + default_model_path=( + "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/" + "imagenet/pruned85_quant-none-vnni" + ), +) +class ImageClassificationPipeline(Pipeline): + """ + Image classification pipeline for DeepSparse + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param class_names: Optional dict, or json file of class names to use for + mapping class ids to class labels. Default is None + """ + + def __init__( + self, + *, + class_names: Union[None, str, Dict[str, str]] = None, + **kwargs, + ): + super().__init__(**kwargs) + + if isinstance(class_names, str) and class_names.endswith(".json"): + self._class_names = json.load(open(class_names)) + elif isinstance(class_names, dict): + self._class_names = class_names + else: + self._class_names = None + + self._image_size = self._infer_image_size() + + @property + def class_names(self) -> Optional[Dict[str, str]]: + """ + :return: Optional dict, or json file of class names to use for + mapping class ids to class labels + """ + return self._class_names + + @property + def input_schema(self) -> Type[ImageClassificationInput]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return ImageClassificationInput + + @property + def output_schema(self) -> Type[ImageClassificationOutput]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return ImageClassificationOutput + + def setup_onnx_file_path(self) -> str: + """ + Performs any setup to unwrap and process the given `model_path` and other + class properties into an inference ready onnx file to be compiled by the + engine of the pipeline + + :return: file path to the ONNX file for the engine to compile + """ + + return model_to_path(self.model_path) + + def process_inputs(self, inputs: ImageClassificationInput) -> List[numpy.ndarray]: + """ + Pre-Process the Inputs for DeepSparse Engine + + :param inputs: input model + :return: list of preprocessed numpy arrays + """ + + if isinstance(inputs.images, numpy.ndarray): + image_batch = inputs.images + else: + + image_batch = [] + + if isinstance(inputs.images, str): + inputs.images = [inputs.images] + + for image in inputs.images: + if cv2 is None: + raise RuntimeError( + "cv2 is required to load image inputs from file " + f"Unable to import: {cv2_error}" + ) + img = cv2.imread(image) if isinstance(image, str) else image + + img = cv2.resize(img, dsize=self._image_size) + img = img[:, :, ::-1].transpose(2, 0, 1) + image_batch.append(img) + + image_batch = numpy.stack(image_batch, axis=0) + + original_dtype = image_batch.dtype + image_batch = numpy.ascontiguousarray(image_batch, dtype=numpy.float32) + + if original_dtype == numpy.uint8: + + image_batch /= 255 + + # normalize entire batch + image_batch -= numpy.asarray(IMAGENET_RGB_MEANS).reshape((-1, 3, 1, 1)) + image_batch /= numpy.asarray(IMAGENET_RGB_STDS).reshape((-1, 3, 1, 1)) + + return [image_batch] + + def process_engine_outputs( + self, + engine_outputs: List[numpy.ndarray], + ) -> ImageClassificationOutput: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + labels = numpy.argmax(engine_outputs[0], axis=1).tolist() + + if self.class_names is not None: + labels = [self.class_names[str(class_id)] for class_id in labels] + + return self.output_schema( + scores=numpy.max(engine_outputs[0], axis=1).tolist(), + labels=labels, + ) + + def _infer_image_size(self) -> Tuple[int, ...]: + """ + Infer and return the expected shape of the input tensor + + :return: The expected shape of the input tensor from onnx graph + """ + onnx_model = onnx.load(self.onnx_file_path) + input_tensor = onnx_model.graph.input[0] + return ( + input_tensor.type.tensor_type.shape.dim[2].dim_value, + input_tensor.type.tensor_type.shape.dim[3].dim_value, + ) diff --git a/src/deepsparse/image_classification/schemas.py b/src/deepsparse/image_classification/schemas.py new file mode 100644 index 0000000000..5a92b90e3b --- /dev/null +++ b/src/deepsparse/image_classification/schemas.py @@ -0,0 +1,42 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Input/Output Schemas for Image Classification. +""" + +from typing import List, Union + +import numpy +from pydantic import BaseModel + + +class ImageClassificationInput(BaseModel): + """ + Input model for image classification + """ + + images: Union[str, numpy.ndarray, List[str]] + + class Config: + arbitrary_types_allowed = True + + +class ImageClassificationOutput(BaseModel): + """ + Output model for image classification + """ + + labels: List[Union[int, str]] + scores: List[float] diff --git a/src/deepsparse/image_classification/validation_script.py b/src/deepsparse/image_classification/validation_script.py new file mode 100644 index 0000000000..e176b4072c --- /dev/null +++ b/src/deepsparse/image_classification/validation_script.py @@ -0,0 +1,162 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage: validation_script.py [OPTIONS] + + Validation Script for Image Classification Models + +Options: + --dataset-path, --dataset_path DIRECTORY + Path to the validation dataset [required] + --model-path, --model_path TEXT + Path/SparseZoo stub for the Image + Classification model to be evaluated. + Defaults to resnet50 trained on + Imagenette [default: zoo:cv/classification/ + resnet_v1-50/pytorch/sparseml/imagenette/ + base-none] + --batch-size, --batch_size INTEGER + Test batch size, must divide the dataset + evenly, else the last batch will be dropped + [default: 1] + --help Show this message and exit. + +######### +EXAMPLES +######### + +########## +Example command for validating pruned resnet50 on imagenette dataset: +python validation_script.py \ + --dataset-path /path/to/imagenette/ + +""" +from tqdm import tqdm + +from deepsparse.pipeline import Pipeline +from torch.utils.data import DataLoader +from torchvision import transforms + + +try: + import torchvision + +except ModuleNotFoundError as torchvision_error: # noqa: F841 + print( + "Torchvision not installed. Please install it using the command:" + "pip install torchvision>=0.3.0,<=0.10.1" + ) + exit(1) + +import click + + +resnet50_imagenet_pruned = ( + "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" +) + + +@click.command() +@click.option( + "--dataset-path", + "--dataset_path", + required=True, + type=click.Path(dir_okay=True, file_okay=False), + help="Path to the validation dataset", +) +@click.option( + "--model-path", + "--model_path", + type=str, + default=resnet50_imagenet_pruned, + help="Path/SparseZoo stub for the Image Classification model to be " + "evaluated. Defaults to dense (vanilla) resnet50 trained on Imagenette", + show_default=True, +) +@click.option( + "--batch-size", + "--batch_size", + type=int, + default=1, + show_default=True, + help="Test batch size, must divide the dataset evenly, else last " + "batch will be dropped", +) +@click.option( + "--image-size", + "--image_size", + type=int, + default=224, + show_default=True, + help="Test batch size, must divide the dataset evenly, else last " + "batch will be dropped", +) +def main(dataset_path: str, model_path: str, batch_size: int, image_size: int): + """ + Validation Script for Image Classification Models + """ + + dataset = torchvision.datasets.ImageFolder( + root=dataset_path, + transform=transforms.Compose( + [ + transforms.ToTensor(), + transforms.Resize(size=(image_size, image_size)), + ] + ), + ) + + data_loader = DataLoader( + dataset=dataset, + batch_size=batch_size, + drop_last=True, + ) + + pipeline = Pipeline.create( + task="image_classification", + model_path=model_path, + batch_size=batch_size, + ) + correct = total = 0 + progress_bar = tqdm(data_loader) + + for batch in progress_bar: + batch, actual_labels = batch + batch = batch.numpy() + outs = pipeline(images=batch) + predicted_labels = outs.labels + + for actual, predicted in zip(actual_labels, predicted_labels): + total += 1 + if isinstance(predicted, str): + predicted = int(predicted) + if actual.item() == predicted: + correct += 1 + + if total > 0: + progress_bar.set_postfix( + {"Running Accuracy": f"{correct * 100 / total:.2f}%"} + ) + + # prevent division by zero + if total == 0: + epsilon = 1e-5 + total += epsilon + + print(f"Accuracy: {correct * 100 / total:.2f} %") + + +if __name__ == "__main__": + main() diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py new file mode 100644 index 0000000000..5ab6b9ec63 --- /dev/null +++ b/src/deepsparse/pipeline.py @@ -0,0 +1,546 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Classes and registry for end to end inference pipelines that wrap an underlying +inference engine and include pre/postprocessing +""" + + +import os +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Type, Union + +import numpy +from pydantic import BaseModel, Field + +from deepsparse import Engine, Scheduler +from deepsparse.benchmark import ORTEngine +from deepsparse.tasks import SupportedTasks + + +__all__ = [ + "DEEPSPARSE_ENGINE", + "ORT_ENGINE", + "SUPPORTED_PIPELINE_ENGINES", + "Pipeline", + "PipelineConfig", +] + + +DEEPSPARSE_ENGINE = "deepsparse" +ORT_ENGINE = "onnxruntime" + +SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] + + +_REGISTERED_PIPELINES = {} + + +class Pipeline(ABC): + """ + Generic Pipeline abstract class meant to wrap inference engine objects to include + data pre/post-processing. Inputs and outputs of pipelines should be serialized + as pydantic Models. + + Pipelines should not be instantiated by their constructors, but rather the + `Pipeline.create()` method. The task name given to `create` will be used to + load the appropriate pipeline. When creating a Pipeline, the pipeline should + inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`, + `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods. + + Finally, the class definition should be decorated by the `Pipeline.register` + function. This defines the task name and task aliases for the pipeline and + ensures that it will be accessible by `Pipeline.create`. The implemented + `Pipeline` subclass must be imported at runtime to be accessible. + + Pipeline lifecycle: + - On instantiation + * `onnx_file_path` <- `setup_onnx_file_path` + * `engine` <- `_initialize_engine` + + - on __call__: + * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)` + * `pre_processed_inputs` <- `process_inputs(parsed_inputs)` + * `engine_outputs` <- `engine(pre_processed_inputs)` + * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)` + + Example use of register: + ```python + @Pipeline.register( + task="example_task", + task_aliases=["example_alias_1", "example_alias_2"], + ) + class PipelineImplementation(Pipeline): + # implementation of Pipeline abstract methods here + ``` + + Example use of pipeline: + ```python + example_pipeline = Pipeline.create( + task="example_task", + model_path="model.onnx", + ) + pipeline_outputs = example_pipeline(pipeline_inputs) + ``` + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + """ + + def __init__( + self, + model_path: str, + engine_type: str = DEEPSPARSE_ENGINE, + batch_size: int = 1, + num_cores: int = None, + scheduler: Scheduler = None, + input_shapes: List[List[int]] = None, + alias: Optional[str] = None, + ): + self._model_path_orig = model_path + self._model_path = model_path + self._engine_type = engine_type + self._alias = alias + + self._engine_args = dict( + batch_size=batch_size, + num_cores=num_cores, + input_shapes=input_shapes, + ) + if engine_type.lower() == DEEPSPARSE_ENGINE: + self._engine_args["scheduler"] = scheduler + + self.onnx_file_path = self.setup_onnx_file_path() + self.engine = self._initialize_engine() + + def __call__(self, *args, **kwargs) -> BaseModel: + # parse inputs into input_schema schema if necessary + pipeline_inputs = self.parse_inputs(*args, **kwargs) + if not isinstance(pipeline_inputs, self.input_schema): + raise RuntimeError( + f"Unable to parse {self.__class__} inputs into a " + f"{self.input_schema} object. Inputs parsed to {type(pipeline_inputs)}" + ) + + # run pipeline + engine_inputs: List[numpy.ndarray] = self.process_inputs(pipeline_inputs) + + if isinstance(engine_inputs, tuple): + engine_inputs, postprocess_kwargs = engine_inputs + else: + postprocess_kwargs = {} + + engine_outputs: List[numpy.ndarray] = self.engine(engine_inputs) + pipeline_outputs = self.process_engine_outputs( + engine_outputs, **postprocess_kwargs + ) + + # validate outputs format + if not isinstance(pipeline_outputs, self.output_schema): + raise ValueError( + f"Outputs of {self.__class__} must be instances of " + f"{self.output_schema} found output of type {type(pipeline_outputs)}" + ) + + return pipeline_outputs + + @staticmethod + def create( + task: str, + model_path: str = None, + engine_type: str = DEEPSPARSE_ENGINE, + batch_size: int = 1, + num_cores: int = None, + scheduler: Scheduler = None, + input_shapes: List[List[int]] = None, + alias: Optional[str] = None, + **kwargs, + ) -> "Pipeline": + """ + :param task: name of task to create a pipeline for + :param model_path: path on local system or SparseZoo stub to load the model + from. Some tasks may have a default model path + :param engine_type: inference engine to use. Currently supported values + include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param kwargs: extra task specific kwargs to be passed to task Pipeline + implementation + :return: pipeline object initialized for the given task + """ + task = task.lower().replace("-", "_") + + # extra step to register pipelines for a given task domain + # for cases where imports should only happen once a user specifies + # that domain is to be used. (ie deepsparse.transformers will auto + # install extra packages so should only import and register once a + # transformers task is specified) + SupportedTasks.check_register_task(task) + + if task not in _REGISTERED_PIPELINES: + raise ValueError( + f"Unknown Pipeline task {task}. Pipeline tasks should be " + "must be declared with the Pipeline.register decorator. Currently " + f"registered pipelines: {list(_REGISTERED_PIPELINES.keys())}" + ) + + pipeline_constructor = _REGISTERED_PIPELINES[task] + + if ( + model_path is None + and hasattr(pipeline_constructor, "default_model_path") + and pipeline_constructor.default_model_path + ): + model_path = pipeline_constructor.default_model_path + + if model_path is None: + raise ValueError( + f"No model_path provided for pipeline {pipeline_constructor}. Must " + "provide a model path for pipelines that do not have a default defined" + ) + + return pipeline_constructor( + model_path=model_path, + engine_type=engine_type, + batch_size=batch_size, + num_cores=num_cores, + scheduler=scheduler, + input_shapes=input_shapes, + alias=alias, + **kwargs, + ) + + @classmethod + def register( + cls, + task: str, + task_aliases: Optional[List[str]] = None, + default_model_path: Optional[str] = None, + ): + """ + Pipeline implementer class decorator that registers the pipeline + task name and its aliases as valid tasks that can be used to load + the pipeline through `Pipeline.create()`. + + Multiple pipelines may not have the same task name. An error will + be raised if two different pipelines attempt to register the same task name + + :param task: main task name of this pipeline + :param task_aliases: list of extra task names that may be used to reference + this pipeline. Default is None + :param default_model_path: path (ie zoo stub) to use as default for this + task if None is provided + """ + task_names = [task] + if task_aliases: + task_names.extend(task_aliases) + + def _register_task(task_name, pipeline_class): + if task_name in _REGISTERED_PIPELINES and ( + pipeline_class is not _REGISTERED_PIPELINES[task_name] + ): + raise RuntimeError( + f"task {task_name} already registered by Pipeline.register. " + f"attempting to register pipeline: {pipeline_class}, but" + f"pipeline: {_REGISTERED_PIPELINES[task_name]}, already registered" + ) + _REGISTERED_PIPELINES[task_name] = pipeline_class + + def _register_pipeline_tasks_decorator(pipeline_class: Pipeline): + if not issubclass(pipeline_class, cls): + raise RuntimeError( + f"Attempting to register pipeline pipeline_class. " + f"Registered pipelines must inherit from {cls}" + ) + for task_name in task_names: + _register_task(task_name, pipeline_class) + + # set task and task_aliases as class level property + pipeline_class.task = task + pipeline_class.task_aliases = task_aliases + pipeline_class.default_model_path = default_model_path + + return pipeline_class + + return _register_pipeline_tasks_decorator + + @classmethod + def from_config(cls, config: Union["PipelineConfig", str, Path]) -> "Pipeline": + """ + :param config: PipelineConfig object, filepath to a json serialized + PipelineConfig, or raw string of a json serialized PipelineConfig + :return: loaded Pipeline object from the config + """ + if isinstance(config, Path) or ( + isinstance(config, str) and os.path.exists(config) + ): + if isinstance(config, str): + config = Path(config) + config = PipelineConfig.parse_file(config) + if isinstance(config, str): + config = PipelineConfig.parse_raw(config) + + return cls.create( + task=config.task, + model_path=config.model_path, + engine_type=config.engine_type, + batch_size=config.batch_size, + num_cores=config.num_cores, + scheduler=config.scheduler, + input_shapes=config.input_shapes, + alias=config.alias, + **config.kwargs, + ) + + @abstractmethod + def setup_onnx_file_path(self) -> str: + """ + Performs any setup to unwrap and process the given `model_path` and other + class properties into an inference ready onnx file to be compiled by the + engine of the pipeline + + :return: file path to the ONNX file for the engine to compile + """ + raise NotImplementedError() + + @abstractmethod + def process_inputs( + self, + inputs: BaseModel, + ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]: + """ + :param inputs: inputs to the pipeline. Must be the type of the `input_schema` + of this pipeline + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine. Can + also include a tuple with engine inputs and special key word arguments + to pass to process_engine_outputs to facilitate information from the raw + inputs to postprocessing that may not be included in the engine inputs + """ + raise NotImplementedError() + + @abstractmethod + def process_engine_outputs( + self, + engine_outputs: List[numpy.ndarray], + **kwargs, + ) -> BaseModel: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + raise NotImplementedError() + + @property + @abstractmethod + def input_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + raise NotImplementedError() + + @property + @abstractmethod + def output_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + raise NotImplementedError() + + @property + def alias(self) -> str: + """ + :return: optional name to give this pipeline instance, useful when + inferencing with multiple models + """ + return self._alias + + @property + def model_path_orig(self) -> str: + """ + :return: value originally passed to the `model_path` argument to initialize + this Pipeline + """ + return self._model_path_orig + + @property + def model_path(self) -> str: + """ + :return: path on local system to the onnx file of this model or directory + containing a model.onnx file along with supporting files + """ + return self._model_path + + @property + def engine_args(self) -> Dict[str, Any]: + """ + :return: arguments besides onnx filepath used to instantiate engine + """ + return self._engine_args + + @property + def engine_type(self) -> str: + """ + :return: type of inference engine used for model forward pass + """ + return self._engine_type + + def to_config(self) -> "PipelineConfig": + """ + :return: PipelineConfig that can be used to reload this object + """ + + if not hasattr(self, "task"): + raise RuntimeError( + f"{self.__class__} instance has no attribute task. Pipeline objects " + "must have a task to be serialized to a config. Pipeline objects " + "must be declared with the Pipeline.register object to be assigned a " + "task" + ) + + # parse any additional properties as kwargs + kwargs = {} + for attr_name, attr in self.__class__.__dict__.items(): + if isinstance(attr, property) and attr_name not in dir(PipelineConfig): + kwargs[attr_name] = getattr(self, attr_name) + + return PipelineConfig( + task=self.task, + model_path=self.model_path_orig, + engine_type=self.engine_type, + batch_size=self.batch_size, + num_cores=self.num_cores, + scheduler=self.scheduler, + input_shapes=self.input_shapes, + alias=self.alias, + kwargs=kwargs, + ) + + def parse_inputs(self, *args, **kwargs) -> BaseModel: + """ + :param args: ordered arguments to pipeline, only an input_schema object + is supported as an arg for this function + :param kwargs: keyword arguments to pipeline + :return: pipeline arguments parsed into the given `input_schema` + schema if necessary. If an instance of the `input_schema` is provided + it will be returned + """ + # passed input_schema schema directly + if len(args) == 1 and isinstance(args[0], self.input_schema) and not kwargs: + return args[0] + + if args: + raise ValueError( + f"pipeline {self.__class__} only supports either only a " + f"{self.input_schema} object. or keyword arguments to be construct " + f"one. Found {len(args)} args and {len(kwargs)} kwargs" + ) + + return self.input_schema(**kwargs) + + def _initialize_engine(self) -> Union[Engine, ORTEngine]: + engine_type = self.engine_type.lower() + + if engine_type == DEEPSPARSE_ENGINE: + return Engine(self.onnx_file_path, **self._engine_args) + elif engine_type == ORT_ENGINE: + return ORTEngine(self.onnx_file_path, **self._engine_args) + else: + raise ValueError( + f"Unknown engine_type {self.engine_type}. Supported values include: " + f"{SUPPORTED_PIPELINE_ENGINES}" + ) + + +class PipelineConfig(BaseModel): + """ + Configuration for creating a Pipeline object + + Can be used to create a Pipeline from a config object or file with + Pipeline.from_config(), or used as a building block for other configs + such as for deepsparse.server + """ + + task: str = Field( + description="name of task to create a pipeline for", + ) + model_path: str = Field( + description="path on local system or SparseZoo stub to load the model from", + ) + engine_type: str = Field( + default=DEEPSPARSE_ENGINE, + description=( + "inference engine to use. Currently supported values include " + "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'" + ), + ) + batch_size: int = Field( + default=1, + description=("static batch size to use for inference. Default is 1"), + ) + num_cores: int = Field( + default=None, + description=( + "number of CPU cores to allocate for inference engine. None" + "specifies all available cores. Default is None" + ), + ) + scheduler: str = Field( + default="async", + description=( + "(deepsparse only) kind of scheduler to execute with. Defaults to async" + ), + ) + input_shapes: List[List[int]] = Field( + default=None, + description=( + "list of shapes to set ONNX the inputs to. Pass None to use model as-is. " + "Default is None" + ), + ) + alias: str = Field( + default=None, + description=( + "optional name to give this pipeline instance, useful when inferencing " + "with multiple models. Default is None" + ), + ) + kwargs: Dict[str, Any] = Field( + default={}, + description=( + "Additional arguments for inference with the model that will be passed " + "into the pipeline as kwargs" + ), + ) diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py index 7f9ac9bd59..0d0be42ec0 100644 --- a/src/deepsparse/server/config.py +++ b/src/deepsparse/server/config.py @@ -19,18 +19,18 @@ import json import os from functools import lru_cache -from typing import Any, Dict, List +from typing import List import yaml from pydantic import BaseModel, Field +from deepsparse import PipelineConfig from deepsparse.cpu import cpu_architecture __all__ = [ "ENV_DEEPSPARSE_SERVER_CONFIG", "ENV_SINGLE_PREFIX", - "ServeModelConfig", "ServerConfig", ] @@ -39,75 +39,15 @@ ENV_SINGLE_PREFIX = "DEEPSPARSE_SINGLE_MODEL:" -class ServeModelConfig(BaseModel): - """ - Configuration for serving a model for a given task in the DeepSparse server - """ - - task: str = Field( - description=( - "The task the model_path is serving. For example, one of: " - "question_answering, text_classification, token_classification." - ), - ) - model_path: str = Field( - description=( - "The path to a model.onnx file, " - "a model folder containing the model.onnx and supporting files, " - "or a SparseZoo model stub." - ), - ) - batch_size: int = Field( - default=1, - description=( - "The batch size to instantiate the model with and use for serving" - ), - ) - alias: str = Field( - default=None, - description=( - "Alias name for model pipeline to be served. A convenience route of " - "/predict/alias will be added to the server if present. " - ), - ) - kwargs: Dict[str, Any] = Field( - default={}, - description=( - "Additional arguments for inference with the model that will be passed " - "into the pipeline as kwargs" - ), - ) - engine: str = Field( - default="deepsparse", - description=( - "The engine to use for serving the models such as deepsparse or onnxruntime" - ), - ) - num_cores: int = Field( - default=None, - description=( - "The number of physical cores to restrict the DeepSparse Engine to. " - "Defaults to all cores." - ), - ) - scheduler: str = Field( - default="async", - description=( - "The scheduler to use with the DeepSparse Engine such as sync or async. " - "Defaults to async" - ), - ) - - class ServerConfig(BaseModel): """ A configuration for serving models in the DeepSparse inference server """ - models: List[ServeModelConfig] = Field( + models: List[PipelineConfig] = Field( default=[], description=( - "The models to serve in the server defined by the additional arguments" + "The models to serve in the server defined by PipelineConfig objects" ), ) workers: str = Field( @@ -148,7 +88,7 @@ def server_config_from_env(env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG): config_dict = json.loads(config_file.replace(ENV_SINGLE_PREFIX, "")) config = ServerConfig() config.models.append( - ServeModelConfig( + PipelineConfig( task=config_dict["task"], model_path=config_dict["model_path"], batch_size=config_dict["batch_size"], @@ -158,7 +98,7 @@ def server_config_from_env(env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG): with open(config_file) as file: config_dict = yaml.safe_load(file.read()) config_dict["models"] = ( - [ServeModelConfig(**model) for model in config_dict["models"]] + [PipelineConfig(**model) for model in config_dict["models"]] if "models" in config_dict else [] ) diff --git a/src/deepsparse/server/main.py b/src/deepsparse/server/main.py index e8efead286..dc31f6427f 100644 --- a/src/deepsparse/server/main.py +++ b/src/deepsparse/server/main.py @@ -84,6 +84,7 @@ import click +from deepsparse import Pipeline from deepsparse.log import set_logging_level from deepsparse.server.asynchronous import execute_async, initialize_aysnc from deepsparse.server.config import ( @@ -91,7 +92,6 @@ server_config_from_env, server_config_to_env, ) -from deepsparse.server.pipelines import load_pipelines_definitions from deepsparse.server.utils import serializable_response from deepsparse.version import version @@ -130,7 +130,11 @@ def _home(): def _add_pipeline_route( - app, pipeline_def, num_models: int, defined_tasks: set, integration: str + app, + pipeline: Pipeline, + num_models: int, + defined_tasks: set, + integration: str, ): path = "/predict" @@ -142,26 +146,27 @@ def _add_pipeline_route( ) # required path name for Sagemaker path = "/invocations" - elif pipeline_def.config.alias: - path = f"/predict/{pipeline_def.config.alias}" + elif pipeline.alias: + path = f"/predict/{pipeline.alias}" elif num_models > 1: - if pipeline_def.config.task in defined_tasks: + if pipeline.task in defined_tasks: raise ValueError( - f"Multiple tasks defined for {pipeline_def.config.task} and no alias " - f"given for {pipeline_def.config}. " + f"Multiple tasks defined for {pipeline.task} and no alias " + f"given for pipeline with model {pipeline.model_path_orig}. " "Either define an alias or supply a single model for the task" ) - path = f"/predict/{pipeline_def.config.task}" - defined_tasks.add(pipeline_def.config.task) + path = f"/predict/{pipeline.task}" + defined_tasks.add(pipeline.task) @app.post( path, - response_model=pipeline_def.response_model, + response_model=pipeline.output_schema, tags=["prediction"], ) - async def _predict_func(request: pipeline_def.request_model): + async def _predict_func(request: pipeline.input_schema): results = await execute_async( - pipeline_def.pipeline, **vars(request), **pipeline_def.kwargs + pipeline, + request, ) return serializable_response(results) @@ -183,15 +188,12 @@ def server_app_factory(): _LOGGER.debug("loaded server config %s", config) _add_general_routes(app, config) - pipeline_defs = load_pipelines_definitions(config) - _LOGGER.debug("loaded pipeline definitions from config %s", pipeline_defs) + pipelines = [Pipeline.from_config(model_config) for model_config in config.models] + _LOGGER.debug("loaded pipeline definitions from config %s", pipelines) num_tasks = len(config.models) defined_tasks = set() - - for pipeline_def in pipeline_defs: - _add_pipeline_route( - app, pipeline_def, num_tasks, defined_tasks, config.integration - ) + for pipeline in pipelines: + _add_pipeline_route(app, pipeline, num_tasks, defined_tasks, config.integration) return app diff --git a/src/deepsparse/server/pipelines.py b/src/deepsparse/server/pipelines.py deleted file mode 100644 index ef07c68ca2..0000000000 --- a/src/deepsparse/server/pipelines.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Pipelines that run preprocessing, postprocessing, and model inference -within the DeepSparse model server. -""" - -from typing import Any, Dict, List - -from pydantic import BaseModel, Field - -from deepsparse.server.config import ServeModelConfig, ServerConfig -from deepsparse.tasks import SupportedTasks - - -__all__ = ["PipelineDefinition", "load_pipelines_definitions"] - - -class PipelineDefinition(BaseModel): - """ - A definition of a pipeline to be served by the model server. - Used to create a prediction route on construction of the server app. - """ - - pipeline: Any = Field(description="the callable pipeline to invoke on each request") - request_model: Any = Field( - description="the pydantic model to validate the request body with" - ) - response_model: Any = Field( - description="the pydantic model to validate the response payload with" - ) - kwargs: Dict[str, Any] = Field( - description="any additional kwargs that should be passed into the pipeline" - ) - config: ServeModelConfig = Field( - description="the config for the model the pipeline is serving" - ) - - -def load_pipelines_definitions(config: ServerConfig) -> List[PipelineDefinition]: - """ - Load the pipeline definitions to use for creating prediction routes from - the given server configuration. - - :param config: the configuration to load pipeline definitions for - :return: the loaded pipeline definitions to use for serving inference requests - """ - defs = [] - - for model_config in config.models: - if SupportedTasks.is_nlp(model_config.task): - # dynamically import so we don't install dependencies when unneeded - from deepsparse.transformers.server import create_pipeline_definitions - - ( - pipeline, - request_model, - response_model, - kwargs, - ) = create_pipeline_definitions(model_config) - else: - raise ValueError( - f"unsupported task given of {model_config.task} " - f"for serve model config {model_config}" - ) - - defs.append( - PipelineDefinition( - pipeline=pipeline, - request_model=request_model, - response_model=response_model, - kwargs=kwargs, - config=model_config, - ) - ) - - return defs diff --git a/src/deepsparse/tasks.py b/src/deepsparse/tasks.py index 6ffaad7ec3..690de5276e 100644 --- a/src/deepsparse/tasks.py +++ b/src/deepsparse/tasks.py @@ -78,6 +78,32 @@ class SupportedTasks: token_classification=AliasedTask("token_classification", ["ner"]), ) + image_classification = namedtuple("image_classification", ["image_classification"])( + image_classification=AliasedTask( + "image_classification", + ["image_classification"], + ), + ) + + yolo = namedtuple("yolo", ["yolo"])( + yolo=AliasedTask("yolo", ["yolo"]), + ) + + @classmethod + def check_register_task(cls, task: str): + if cls.is_nlp(task): + # trigger transformers pipelines to register with Pipeline.register + import deepsparse.transformers.pipelines # noqa: F401 + + elif cls.is_image_classification(task): + # trigger image classification pipelines to + # register with Pipeline.register + import deepsparse.image_classification.pipelines # noqa: F401 + + elif cls.is_yolo(task): + # trigger yolo pipelines to register with Pipeline.register + import deepsparse.yolo.pipelines # noqa: F401 + @classmethod def is_nlp(cls, task: str) -> bool: """ @@ -90,3 +116,21 @@ def is_nlp(cls, task: str) -> bool: or cls.nlp.text_classification.matches(task) or cls.nlp.token_classification.matches(task) ) + + @classmethod + def is_image_classification(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + classification task + :return: True if it is an image classification task, False otherwise + """ + return cls.image_classification.image_classification.matches(task) + + @classmethod + def is_yolo(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + segmentation task using YOLO + :return: True if it is an segmentation task using YOLO, False otherwise + """ + return cls.yolo.yolo.matches(task) diff --git a/src/deepsparse/transformers/__init__.py b/src/deepsparse/transformers/__init__.py index 89c7eb68ef..1264aa316d 100644 --- a/src/deepsparse/transformers/__init__.py +++ b/src/deepsparse/transformers/__init__.py @@ -120,4 +120,3 @@ def _check_transformers_install(): from .helpers import * from .loaders import * from .pipelines import * -from .server import * diff --git a/src/deepsparse/transformers/eval_downstream.py b/src/deepsparse/transformers/eval_downstream.py index b434dec625..8f9e9c5d49 100644 --- a/src/deepsparse/transformers/eval_downstream.py +++ b/src/deepsparse/transformers/eval_downstream.py @@ -58,7 +58,7 @@ from tqdm.auto import tqdm -from deepsparse.transformers import pipeline +from deepsparse import Pipeline from datasets import load_dataset, load_metric # isort: skip @@ -79,14 +79,14 @@ def squad_eval(args): squad_metrics = load_metric("squad") # load QA pipeline - question_answer = pipeline( + question_answer = Pipeline.create( task="question-answering", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - max_length=args.max_sequence_length, + sequence_length=args.max_sequence_length, ) - print(f"Engine info: {question_answer.model}") + print(f"Engine info: {question_answer.engine}") for idx, sample in enumerate(tqdm(squad)): pred = question_answer( @@ -96,7 +96,7 @@ def squad_eval(args): ) squad_metrics.add_batch( - predictions=[{"prediction_text": pred["answer"], "id": sample["id"]}], + predictions=[{"prediction_text": pred.answer, "id": sample["id"]}], references=[{"answers": sample["answers"], "id": sample["id"]}], ) @@ -114,21 +114,21 @@ def mnli_eval(args): mnli_metrics = load_metric("glue", "mnli") # load pipeline - text_classify = pipeline( + text_classify = Pipeline.create( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - max_length=args.max_sequence_length, + sequence_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.model}") + print(f"Engine info: {text_classify.engine}") label_map = {"entailment": 0, "neutral": 1, "contradiction": 2} for idx, sample in enumerate(tqdm(mnli_matched)): pred = text_classify([[sample["premise"], sample["hypothesis"]]]) mnli_metrics.add_batch( - predictions=[label_map.get(pred[0]["label"])], + predictions=[label_map.get(pred.labels[0])], references=[sample["label"]], ) @@ -154,14 +154,14 @@ def qqp_eval(args): qqp_metrics = load_metric("glue", "qqp") # load pipeline - text_classify = pipeline( + text_classify = Pipeline.create( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - max_length=args.max_sequence_length, + sequence_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.model}") + print(f"Engine info: {text_classify.engine}") label_map = {"not_duplicate": 0, "duplicate": 1} @@ -169,7 +169,7 @@ def qqp_eval(args): pred = text_classify([[sample["question1"], sample["question2"]]]) qqp_metrics.add_batch( - predictions=[label_map.get(pred[0]["label"])], + predictions=[label_map.get(pred.labels[0])], references=[sample["label"]], ) @@ -185,14 +185,14 @@ def sst2_eval(args): sst2_metrics = load_metric("glue", "sst2") # load pipeline - text_classify = pipeline( + text_classify = Pipeline.create( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - max_length=args.max_sequence_length, + sequence_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.model}") + print(f"Engine info: {text_classify.engine}") label_map = {"negative": 0, "positive": 1} @@ -202,7 +202,7 @@ def sst2_eval(args): ) sst2_metrics.add_batch( - predictions=[label_map.get(pred[0]["label"])], + predictions=[label_map.get(pred.labels[0])], references=[sample["label"]], ) diff --git a/src/deepsparse/transformers/pipelines.py b/src/deepsparse/transformers/pipelines.py deleted file mode 100644 index 7725a0e2c2..0000000000 --- a/src/deepsparse/transformers/pipelines.py +++ /dev/null @@ -1,1414 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Adaptation of transformers.pipelines and onnx_transformers.pipelines - -adapted from: -https://github.com/huggingface/transformers/blob/master/src/transformers/pipelines/base.py -https://github.com/patil-suraj/onnx_transformers/blob/master/onnx_transformers/pipelines.py - -""" - -import json -from abc import ABC, abstractmethod -from dataclasses import dataclass -from itertools import chain -from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union - -import numpy as np -from transformers.configuration_utils import PretrainedConfig -from transformers.data import ( - SquadExample, - SquadFeatures, - squad_convert_examples_to_features, -) -from transformers.file_utils import ExplicitEnum -from transformers.models.auto import AutoConfig, AutoTokenizer -from transformers.tokenization_utils import PreTrainedTokenizer -from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy -from transformers.utils import logging - -from deepsparse import Engine, compile_model, cpu -from deepsparse.transformers.helpers import ( - fix_numpy_types, - get_onnx_path_and_configs, - overwrite_transformer_onnx_model_inputs, -) -from deepsparse.transformers.loaders import get_batch_loader - - -try: - import onnxruntime - - ort_import_error = None -except Exception as ort_import_err: - onnxruntime = None - ort_import_error = ort_import_err - -__all__ = [ - "ArgumentHandler", - "Pipeline", - "TextClassificationPipeline", - "TokenClassificationPipeline", - "QuestionAnsweringPipeline", - "pipeline", - "overwrite_transformer_onnx_model_inputs", - "SUPPORTED_ENGINES", - "SUPPORTED_TASKS", -] - -logger = logging.get_logger(__name__) if logging else None - - -class ArgumentHandler(ABC): - """ - Base interface for handling arguments for each Pipeline. - """ - - @abstractmethod - def __call__(self, *args, **kwargs): - raise NotImplementedError() - - -class DefaultArgumentHandler(ArgumentHandler): - """ - Default argument parser handling parameters for each Pipeline`. - """ - - @staticmethod - def handle_kwargs(kwargs: Dict) -> List: - """ - :param kwargs: key word arguments for a pipeline - :return: list of the processed key word arguments - """ - if len(kwargs) == 1: - output = list(kwargs.values()) - else: - output = list(chain(kwargs.values())) - - return DefaultArgumentHandler.handle_args(output) - - @staticmethod - def handle_args(args: Sequence[Any]) -> List[str]: - """ - :param args: sequence of arguments to a pipeline - :return: list of formatted, processed arguments - """ - - # Only one argument, let's do case by case - if len(args) == 1: - if isinstance(args[0], str): - return [args[0]] - elif not isinstance(args[0], list): - return list(args) - else: - return args[0] - - # Multiple arguments (x1, x2, ...) - elif len(args) > 1: - if all([isinstance(arg, str) for arg in args]): - return list(args) - - # If not instance of list, then it should be an instance of iterable - elif isinstance(args, Iterable): - return list(chain.from_iterable(chain(args))) - else: - raise ValueError( - f"Invalid input type {type(args)}. Pipeline supports " - "Union[str, Iterable[str]]" - ) - else: - return [] - - def __call__(self, *args, **kwargs): - if len(kwargs) > 0 and len(args) > 0: - raise ValueError("Pipeline cannot handle mixed args and kwargs") - - if len(kwargs) > 0: - return DefaultArgumentHandler.handle_kwargs(kwargs) - else: - return DefaultArgumentHandler.handle_args(args) - - -class _ScikitCompat(ABC): - """ - Interface layer for the Scikit and Keras compatibility. - """ - - @abstractmethod - def transform(self, X): - raise NotImplementedError() - - @abstractmethod - def predict(self, X): - raise NotImplementedError() - - -class Pipeline(_ScikitCompat): - """ - The Pipeline class is the class from which all pipelines inherit. - Refer to this class for methods shared across different pipelines. - This base Pipeline class provides support for multiple inference engine backends. - - Base class implementing pipelined operations. - Pipeline workflow is defined as a sequence of the following operations: - - Input -> Tokenization -> Model Inference -> - Post-Processing (task dependent) -> Output - - Pipeline supports running with the DeepSparse engine or onnxruntime. - - :param model: loaded inference engine to run the model with, can be a - deepsparse Engine or onnxruntime InferenceSession - :param tokenizer: tokenizer to be used for preprocessing - :param config: transformers model config for this model - :param engine_type: name of inference engine that is used. Options are - deepsparse and onnxruntime - :param max_length: maximum sequence length to set for model inputs by default. - default value is 128 - :param input_names: list of input names to the neural network - :param args_parser: Reference to the object in charge of parsing supplied - pipeline parameters. A default is provided if None - :param binary_output: if True, stores outputs as pickled binaries to avoid - storing large amount of textual data. Default is False - """ - - default_input_names = None - - def __init__( - self, - model: Union[Engine, "onnxruntime.InferenceSession"], - tokenizer: PreTrainedTokenizer, - config: PretrainedConfig, - engine_type: str, - max_length: int = 128, - input_names: Optional[List[str]] = None, - args_parser: ArgumentHandler = None, - binary_output: bool = False, - ): - - self.model = model - self.tokenizer = tokenizer - self.config = config - self.engine_type = engine_type - self.max_length = max_length - self.input_names = input_names - self.binary_output = binary_output - self._args_parser = args_parser or DefaultArgumentHandler() - self._framework = ( - "np" if self.engine_type in [DEEPSPARSE_ENGINE, ORT_ENGINE] else "pt" - ) - - def transform(self, X): - """ - Scikit / Keras interface to transformers' pipelines. - This method will forward to __call__(). - """ - return self(X=X) - - def predict(self, X): - """ - Scikit / Keras interface to transformers' pipelines. - This method will forward to __call__(). - """ - return self(X=X) - - def _parse_and_tokenize( - self, *args, padding=True, add_special_tokens=True, **kwargs - ): - # Parse arguments - inputs = self._args_parser(*args, **kwargs) - inputs = self.tokenizer( - inputs, - add_special_tokens=add_special_tokens, - return_tensors=self._framework, - padding=PaddingStrategy.MAX_LENGTH.value, - truncation=TruncationStrategy.LONGEST_FIRST.value, - ) - - return inputs - - def __call__(self, *args, **kwargs): - inputs = self._parse_and_tokenize(*args, **kwargs) - return self._forward(inputs) - - def _forward(self, inputs): - if not all(name in inputs for name in self.input_names): - raise ValueError( - f"pipeline expected arrays with names {self.input_names}, received " - f"inputs: {list(inputs.keys())}" - ) - - if self.engine_type == ORT_ENGINE: - inputs = {k: v for k, v in inputs.items() if k in self.input_names} - return self.model.run(None, inputs) - elif self.engine_type == DEEPSPARSE_ENGINE: - return self.model.run([inputs[name] for name in self.input_names]) - # TODO: torch - # with self.device_placement(): - # with torch.no_grad(): - # inputs = self.ensure_tensor_on_device(**inputs) - # predictions = self.model(**inputs)[0].cpu() - # if return_tensors: - # return predictions - # else: - # return predictions.numpy() - - -class TokenClassificationArgumentHandler(ArgumentHandler): - """ - Handles arguments for token classification. - """ - - def __call__(self, inputs: Union[str, List[str]], **kwargs): - - if inputs is not None and isinstance(inputs, (list, tuple)) and len(inputs) > 0: - inputs = list(inputs) - batch_size = len(inputs) - elif isinstance(inputs, str): - inputs = [inputs] - batch_size = 1 - else: - raise ValueError("At least one input is required.") - - offset_mapping = kwargs.get("offset_mapping") - if offset_mapping: - if isinstance(offset_mapping, list) and isinstance( - offset_mapping[0], tuple - ): - offset_mapping = [offset_mapping] - if len(offset_mapping) != batch_size: - raise ValueError( - "offset_mapping should have the same batch size as the input" - ) - return inputs, offset_mapping - - -class QuestionAnsweringArgumentHandler(ArgumentHandler): - """ - QuestionAnsweringPipeline requires the user to provide multiple arguments - (i.e. question & context) to be mapped - to internal `transformers.SquadExample` - - QuestionAnsweringArgumentHandler manages all the possible to create a - `transformers.SquadExample` from the command-line supplied arguments - """ - - def __call__(self, *args, **kwargs): - # Position args, handling is sensibly the same as X and data, - # so forwarding to avoid duplicating - if args is not None and len(args) > 0: - if len(args) == 1: - kwargs["X"] = args[0] - else: - kwargs["X"] = list(args) - - # Generic compatibility with sklearn and Keras - # Batched data - if "X" in kwargs or "data" in kwargs: - inputs = kwargs["X"] if "X" in kwargs else kwargs["data"] - - if isinstance(inputs, dict): - inputs = [inputs] - else: - # Copy to avoid overriding arguments - inputs = [i for i in inputs] - - for i, item in enumerate(inputs): - if isinstance(item, dict): - if any(k not in item for k in ["question", "context"]): - raise KeyError( - "You need to provide a dictionary with keys " - "{question:..., context:...}" - ) - - inputs[i] = QuestionAnsweringPipeline.create_sample(**item) - - elif not isinstance(item, SquadExample): - arg_name = "X" if "X" in kwargs else "data" - raise ValueError( - f"{arg_name} argument needs to be of type " - "(list[SquadExample | dict], SquadExample, dict)" - ) - - # Tabular input - elif "question" in kwargs and "context" in kwargs: - if isinstance(kwargs["question"], str): - kwargs["question"] = [kwargs["question"]] - - if isinstance(kwargs["context"], str): - kwargs["context"] = [kwargs["context"]] - - inputs = [ - QuestionAnsweringPipeline.create_sample(q, c) - for q, c in zip(kwargs["question"], kwargs["context"]) - ] - else: - raise ValueError(f"Unknown arguments {kwargs}") - - if not isinstance(inputs, list): - inputs = [inputs] - - return inputs - - -class TextClassificationPipeline(Pipeline): - """ - Text classification pipeline using any `ModelForSequenceClassification`. - - This text classification pipeline can currently be loaded from `pipeline()` - using the following task identifier: `"text-classification"`. - - The models that this pipeline can use are models that have been fine-tuned on - a text classification task. - - :param return_all_scores: set True to return all model scores. Default False - """ - - def __init__(self, return_all_scores: bool = False, **kwargs): - super().__init__(**kwargs) - - self.return_all_scores = return_all_scores - - def __call__(self, *args, **kwargs): - """ - Classify the text(s) given as inputs. - - :param args: One or several texts (or one list of prompts) to classify - :param args: kwargs for inner call function - :return: A list or a list of list of dicts: Each result comes as list of dicts - with the following keys: - - `label` -- The label predicted. - - `score` -- The corresponding probability. - If ``self.return_all_scores=True``, one dictionary is returned per label - """ - outputs = super().__call__(*args, **kwargs) - - if isinstance(outputs, list) and outputs: - outputs = outputs[0] - - if self.config.num_labels == 1: - scores = 1.0 / (1.0 + np.exp(-outputs)) - else: - scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True) - if self.return_all_scores: - return [ - [ - {"label": self.config.id2label[i], "score": score.item()} - for i, score in enumerate(item) - ] - for item in scores - ] - else: - return [ - { - "label": self.config.id2label[item.argmax()], - "score": item.max().item(), - } - for item in scores - ] - - -class AggregationStrategy(ExplicitEnum): - """ - All the valid aggregation strategies for TokenClassificationPipeline - """ - - NONE = "none" - SIMPLE = "simple" - FIRST = "first" - AVERAGE = "average" - MAX = "max" - - -class TokenClassificationPipeline(Pipeline): - """ - Named Entity Recognition pipeline using any `ModelForTokenClassification`. - - This token classification pipeline can currently be loaded from `pipeline()` - using the following task identifier: `"token-classification"`. - - The models that this pipeline can use are models that have been fine-tuned on - a token classification task. - - :param args_parser: argument parser to use default is - TokenClassificationArgumentHandler - :param aggregation_strategy: AggregationStrategy Enum object to determine - the pipeline aggregation strategy. Default is AggregationStrategy.NONE - :param ignore_labels: list of labels to ignore. Default is `["O"]` - """ - - default_input_names = "sequences" - - def __init__( - self, - args_parser: ArgumentHandler = None, - aggregation_strategy: AggregationStrategy = AggregationStrategy.NONE, - ignore_labels: List[str] = False, - **kwargs, - ): - super().__init__( - args_parser=args_parser or TokenClassificationArgumentHandler(), - **kwargs, - ) - - self.ignore_labels = ignore_labels or ["O"] - - if isinstance(aggregation_strategy, str): - aggregation_strategy = AggregationStrategy[aggregation_strategy.upper()] - - if ( - aggregation_strategy - in { - AggregationStrategy.FIRST, - AggregationStrategy.MAX, - AggregationStrategy.AVERAGE, - } - and not self.tokenizer.is_fast - ): - raise ValueError( - "Slow tokenizers cannot handle subwords. Please set the " - '`aggregation_strategy` option to `"simple"` or use a fast tokenizer.' - ) - - self.aggregation_strategy = aggregation_strategy - - def __call__(self, inputs: Union[str, List[str]], **kwargs): - """ - Classify each token of the text(s) given as inputs. - - - :param inputs: One or several texts (or one list of texts) for token - classification - :return: A list or a list of list of :obj:`dict`: Each result comes as a list - of dictionaries (one for each token in the corresponding input, or each - entity if this pipeline was instantiated with an aggregation_strategy) - with the following keys: - - `word` -- The token/word classified. - - `score` -- The corresponding probability for `entity`. - - `entity` -- The entity predicted for that token/word (it is named - `entity_group` when `aggregation_strategy` is not `"none"`. - - `index` -- The index of the corresponding token in the sentence. - - `start` -- index of the start of the corresponding entity in the sentence - Only exists if the offsets are available within the tokenizer - - `end` -- The index of the end of the corresponding entity in the sentence. - Only exists if the offsets are available within the tokenizer - """ - - _inputs, offset_mappings = self._args_parser(inputs, **kwargs) - - answers = [] - - tokens = self.tokenizer( - _inputs, - return_tensors=self._framework, - truncation=TruncationStrategy.LONGEST_FIRST.value, - padding=PaddingStrategy.MAX_LENGTH.value, - return_special_tokens_mask=True, - return_offsets_mapping=self.tokenizer.is_fast, - ) - - if self.tokenizer.is_fast: - offset_mapping = tokens.pop("offset_mapping") - elif not offset_mappings: - offset_mapping = [None] * len(_inputs) - - special_tokens_mask = tokens.pop("special_tokens_mask") - - # Forward - _forward_pass = self._forward(tokens) - for entities_index, current_entities in enumerate(_forward_pass[0]): - input_ids = tokens["input_ids"][entities_index] - - scores = np.exp(current_entities) / np.exp(current_entities).sum( - -1, keepdims=True - ) - pre_entities = self.gather_pre_entities( - _inputs[entities_index], - input_ids, - scores, - offset_mapping[entities_index], - special_tokens_mask[entities_index], - ) - grouped_entities = self.aggregate(pre_entities, self.aggregation_strategy) - # Filter anything that is in self.ignore_labels - current_entities = [ - entity - for entity in grouped_entities - if entity.get("entity", None) not in self.ignore_labels - and entity.get("entity_group", None) not in self.ignore_labels - ] - answers.append(current_entities) - - if len(answers) == 1: - return answers[0] - return answers - - def gather_pre_entities( - self, - sentence: str, - input_ids: np.ndarray, - scores: np.ndarray, - offset_mapping: Optional[List[Tuple[int, int]]], - special_tokens_mask: np.ndarray, - ) -> List[dict]: - pre_entities = [] - for idx, token_scores in enumerate(scores): - # Filter special_tokens, they should only occur - # at the sentence boundaries since we're not encoding pairs of - # sentences so we don't have to keep track of those. - if special_tokens_mask[idx]: - continue - - word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx])) - if offset_mapping is not None: - start_ind, end_ind = offset_mapping[idx] - word_ref = sentence[start_ind:end_ind] - is_subword = len(word_ref) != len(word) - - if int(input_ids[idx]) == self.tokenizer.unk_token_id: - word = word_ref - is_subword = False - else: - start_ind = None - end_ind = None - is_subword = False - - pre_entity = { - "word": word, - "scores": token_scores, - "start": start_ind, - "end": end_ind, - "index": idx, - "is_subword": is_subword, - } - pre_entities.append(pre_entity) - return pre_entities - - def aggregate( - self, pre_entities: List[dict], aggregation_strategy: AggregationStrategy - ) -> List[dict]: - if aggregation_strategy in { - AggregationStrategy.NONE, - AggregationStrategy.SIMPLE, - }: - entities = [] - for pre_entity in pre_entities: - entity_idx = pre_entity["scores"].argmax() - score = pre_entity["scores"][entity_idx] - entity = { - "entity": self.config.id2label[entity_idx], - "score": score, - "index": pre_entity["index"], - "word": pre_entity["word"], - "start": pre_entity["start"], - "end": pre_entity["end"], - } - entities.append(entity) - else: - entities = self.aggregate_words(pre_entities, aggregation_strategy) - - if aggregation_strategy == AggregationStrategy.NONE: - return entities - return self.group_entities(entities) - - def aggregate_word( - self, entities: List[dict], aggregation_strategy: AggregationStrategy - ) -> dict: - word = self.tokenizer.convert_tokens_to_string( - [entity["word"] for entity in entities] - ) - if aggregation_strategy == AggregationStrategy.FIRST: - scores = entities[0]["scores"] - idx = scores.argmax() - score = scores[idx] - entity = self.config.id2label[idx] - elif aggregation_strategy == AggregationStrategy.MAX: - max_entity = max(entities, key=lambda entity: entity["scores"].max()) - scores = max_entity["scores"] - idx = scores.argmax() - score = scores[idx] - entity = self.config.id2label[idx] - elif aggregation_strategy == AggregationStrategy.AVERAGE: - scores = np.stack([entity["scores"] for entity in entities]) - average_scores = np.nanmean(scores, axis=0) - entity_idx = average_scores.argmax() - entity = self.config.id2label[entity_idx] - score = average_scores[entity_idx] - else: - raise ValueError("Invalid aggregation_strategy") - new_entity = { - "entity": entity, - "score": score, - "word": word, - "start": entities[0]["start"], - "end": entities[-1]["end"], - } - return new_entity - - def aggregate_words( - self, entities: List[dict], aggregation_strategy: AggregationStrategy - ) -> List[dict]: - assert aggregation_strategy not in { - AggregationStrategy.NONE, - AggregationStrategy.SIMPLE, - }, "NONE and SIMPLE strategies are invalid" - - word_entities = [] - word_group = None - for entity in entities: - if word_group is None: - word_group = [entity] - elif entity["is_subword"]: - word_group.append(entity) - else: - word_entities.append( - self.aggregate_word(word_group, aggregation_strategy) - ) - word_group = [entity] - # Last item - word_entities.append(self.aggregate_word(word_group, aggregation_strategy)) - return word_entities - - def group_sub_entities(self, entities: List[dict]) -> dict: - # Get the first entity in the entity group - entity = entities[0]["entity"].split("-")[-1] - scores = np.nanmean([entity["score"] for entity in entities]) - tokens = [entity["word"] for entity in entities] - - entity_group = { - "entity_group": entity, - "score": np.mean(scores), - "word": self.tokenizer.convert_tokens_to_string(tokens), - "start": entities[0]["start"], - "end": entities[-1]["end"], - } - return entity_group - - def get_tag(self, entity_name: str) -> Tuple[str, str]: - if entity_name.startswith("B-"): - bi = "B" - tag = entity_name[2:] - elif entity_name.startswith("I-"): - bi = "I" - tag = entity_name[2:] - else: - # It's not in B-, I- format - bi = "B" - tag = entity_name - return bi, tag - - def group_entities(self, entities: List[dict]) -> List[dict]: - - entity_groups = [] - entity_group_disagg = [] - - for entity in entities: - if not entity_group_disagg: - entity_group_disagg.append(entity) - continue - - # If the current entity is similar and adjacent to the previous entity, - # append it to the disaggregated entity group - # The split is meant to account for the "B" and "I" prefixes - # Shouldn't merge if both entities are B-type - bi, tag = self.get_tag(entity["entity"]) - last_bi, last_tag = self.get_tag(entity_group_disagg[-1]["entity"]) - - if tag == last_tag and bi != "B": - # Modify subword type to be previous_type - entity_group_disagg.append(entity) - else: - # If the current entity is different from the previous entity - # aggregate the disaggregated entity group - entity_groups.append(self.group_sub_entities(entity_group_disagg)) - entity_group_disagg = [entity] - if entity_group_disagg: - # it's the last entity, add it to the entity groups - entity_groups.append(self.group_sub_entities(entity_group_disagg)) - - return entity_groups - - -class QuestionAnsweringPipeline(Pipeline): - """ - Question Answering pipeline using any `ModelForQuestionAnswering` - - This question answering pipeline can currently be loaded from `pipeline()` - using the following task identifier: `"question-answering"`. - - The models that this pipeline can use are models that have been fine-tuned on - a question answering task. - - :param model: loaded inference engine to run the model with, can be a - deepsparse Engine or onnxruntime InferenceSession - :param tokenizer: tokenizer to be used for preprocessing - :param config: transformers model config for this model - :param engine_type: name of inference engine that is used. Options are - deepsparse and onnxruntime - :param input_names: list of input names to the neural network - :param args_parser: Reference to the object in charge of parsing supplied - pipeline parameters. A default is provided if None - :param binary_output: if True, stores outputs as pickled binaries to avoid - storing large amount of textual data. Default is False - """ - - default_input_names = "question,context" - - def __init__( - self, - model: Union[Engine, "onnxruntime.InferenceSession"], - tokenizer: PreTrainedTokenizer, - engine_type: str, - input_names: Optional[List[str]] = None, - **kwargs, - ): - super().__init__( - model=model, - tokenizer=tokenizer, - engine_type=engine_type, - args_parser=QuestionAnsweringArgumentHandler(), - input_names=input_names, - **kwargs, - ) - - @staticmethod - def create_sample( - question: Union[str, List[str]], context: Union[str, List[str]] - ) -> Union[SquadExample, List[SquadExample]]: - """ - :param question: single question or list of question strings - :param context: single context or list of context strings - :return: processed SquadExample object(s) for each question/context pair given - """ - if isinstance(question, list): - return [ - SquadExample(None, q, c, None, None, None) - for q, c in zip(question, context) - ] - else: - return SquadExample(None, question, context, None, None, None) - - def __call__(self, *args, **kwargs): - """ - Answer the question(s) given as inputs by using the context(s). - Multiple arguments can be used to pass the context, question data - - :param args: SquadExample or list of them containing the question and context - :param X: SquadExample or list of them containing the question and context - :param data: SquadExample or list of them containing the question and context - :param question: single question or list of question strings - :param context: single context or list of context strings - :param topk: the number of answers to return. Will be chosen by - order of likelihood) - :param doc_stride: if the context is too long to fit with the question for the - model, it will be split in several chunks with some overlap. This argument - controls the size of that overlap - :param max_answer_len: maximum length of predicted answers (e.g., only - answers with a shorter length are considered) - :param max_seq_len: maximum length of the total sentence (context + question) - after tokenization. The context will be split in several chunks - (using the doc_stride) if needed - :param max_question_len: maximum length of the question after tokenization. - It will be truncated if needed - :param handle_impossible_answer: whether or not we accept impossible as an - answer - :param num_spans: maximum number of span to use as input from a long - context. Default is to stride the entire context string - :param preprocessed_inputs: if provided, preprocessing will be skipped in favor - of these inputs. Expected format is the output of self.preprocess; a tuple - of (examples, features_list) - :return: dict or list of dictionaries, each containing the following keys: - `"score"` - The probability associated to the answer - `"start"` - The start index of the answer - `"end"` - The end index of the answer - `"answer"` - The answer to the question - """ - # Set defaults values - kwargs.setdefault("topk", 1) - kwargs.setdefault("max_answer_len", 15) - kwargs.setdefault("handle_impossible_answer", False) - kwargs.setdefault("preprocessed_inputs", None) # (examples, features_list) - - if kwargs["topk"] < 1: - raise ValueError(f"topk parameter should be >= 1 (got {kwargs['topk']})") - - if kwargs["max_answer_len"] < 1: - raise ValueError( - "max_answer_len parameter should be >= 1 " - f"(got {kwargs['max_answer_len']})" - ) - - # run pre-processing if not provided - examples, features_list = kwargs["preprocessed_inputs"] or self.preprocess( - *args, **kwargs - ) - - # forward pass and post-processing - all_answers = [] - for features, example in zip(features_list, examples): - model_input_names = self.tokenizer.model_input_names + ["input_ids"] - fw_args = { - k: [feature.__dict__[k] for feature in features] - for k in model_input_names - } - - # Manage tensor allocation on correct device - fw_args = {k: np.array(v) for (k, v) in fw_args.items()} - start, end = self._forward(fw_args)[:2] - - # TODO: torch - # fw_args = {k: torch.tensor(v, device=self.device) - # for (k, v) in fw_args.items()} - # start, end = self.model(**fw_args)[:2] - # start, end = start.cpu().numpy(), end.cpu().numpy() - - min_null_score = 1000000 # large and positive - answers = [] - for (feature, start_, end_) in zip(features, start, end): - # Ensure padded tokens & question tokens cannot belong - undesired_tokens = ( - np.abs(np.array(feature.p_mask) - 1) & feature.attention_mask - ) - - # Generate mask - undesired_tokens_mask = undesired_tokens == 0.0 - - # Make sure non-context indexes cannot contribute to the softmax - start_ = np.where(undesired_tokens_mask, -10000.0, start_) - end_ = np.where(undesired_tokens_mask, -10000.0, end_) - - # Normalize logits and spans to retrieve the answer - start_ = np.exp( - start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)) - ) - end_ = np.exp( - end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)) - ) - - if kwargs["handle_impossible_answer"]: - min_null_score = min(min_null_score, (start_[0] * end_[0]).item()) - - # Mask CLS - start_[0] = end_[0] = 0.0 - - starts, ends, scores = self.decode( - start_, end_, kwargs["topk"], kwargs["max_answer_len"] - ) - - if not self.tokenizer.is_fast: - char_to_word = np.array(example.char_to_word_offset) - answers += [ - { - "score": score.item(), - "start": np.where( - char_to_word == feature.token_to_orig_map[s] - )[0][0].item(), - "end": np.where( - char_to_word == feature.token_to_orig_map[e] - )[0][-1].item(), - "answer": " ".join( - example.doc_tokens[ - feature.token_to_orig_map[ - s - ] : feature.token_to_orig_map[e] - + 1 - ] - ), - } - for s, e, score in zip(starts, ends, scores) - ] - else: - question_first = bool(self.tokenizer.padding_side == "right") - - # Sometimes the max probability token is in the middle of a word so: - # we start by finding the right word containing the token with - # `token_to_word` then we convert this word in a character span - answers += [ - { - "score": score.item(), - "start": feature.encoding.word_to_chars( - feature.encoding.token_to_word(s), - sequence_index=1 if question_first else 0, - )[0], - "end": feature.encoding.word_to_chars( - feature.encoding.token_to_word(e), - sequence_index=1 if question_first else 0, - )[1], - "answer": example.context_text[ - feature.encoding.word_to_chars( - feature.encoding.token_to_word(s), - sequence_index=1 if question_first else 0, - )[0] : feature.encoding.word_to_chars( - feature.encoding.token_to_word(e), - sequence_index=1 if question_first else 0, - )[ - 1 - ] - ], - } - for s, e, score in zip(starts, ends, scores) - ] - - if kwargs["handle_impossible_answer"]: - answers.append( - {"score": min_null_score, "start": 0, "end": 0, "answer": ""} - ) - - answers = sorted(answers, key=lambda x: x["score"], reverse=True)[ - : kwargs["topk"] - ] - all_answers += answers - - if len(all_answers) == 1: - return all_answers[0] - return all_answers - - def preprocess(self, *args, **kwargs) -> Tuple[Any, Any]: - """ - preprocess the given QA model inputs using squad_convert_examples_to_features - - :param args: SquadExample or list of them containing the question and context - :param X: SquadExample or list of them containing the question and context - :param data: SquadExample or list of them containing the question and context - :param question: single question or list of question strings - :param context: single context or list of context strings - :param doc_stride: if the context is too long to fit with the question for the - model, it will be split in several chunks with some overlap. This argument - controls the size of that overlap - :param max_seq_len: maximum length of the total sentence (context + question) - after tokenization. The context will be split in several chunks - (using the doc_stride) if needed - :param max_question_len: maximum length of the question after tokenization. - It will be truncated if needed - :param num_spans: maximum number of spans to use as input from a long - context. Default is to stride the entire context string - :return: tuple of SquadExample inputs and preprocessed features list - """ - kwargs.setdefault("doc_stride", 128) - kwargs.setdefault("max_seq_len", self.max_length) - kwargs.setdefault("max_question_len", 64) - kwargs.setdefault("num_spans", None) - - # Convert inputs to features - examples = self._args_parser(*args, **kwargs) - if not self.tokenizer.is_fast: - features_list = [ - squad_convert_examples_to_features( - examples=[example], - tokenizer=self.tokenizer, - max_seq_length=kwargs["max_seq_len"], - doc_stride=kwargs["doc_stride"], - max_query_length=kwargs["max_question_len"], - padding_strategy=PaddingStrategy.MAX_LENGTH.value, - is_training=False, - tqdm_enabled=False, - ) - for example in examples - ] - else: - features_list = self._encode_features_fast(examples, **kwargs) - - if kwargs["num_spans"]: - features_list = [ - features[: kwargs["num_spans"]] for features in features_list - ] - - return examples, features_list - - def decode( - self, start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int - ) -> Tuple: - """ - :param start: Individual start probabilities for each token - :param end: Individual end probabilities for each token - :param topk: Indicates how many possible answer span(s) to extract from the - model output - :param max_answer_len: Maximum size of the answer to extract from the model - output - :return: probabilities for each span to be the actual answer. Will filter out - unwanted and impossible cases - """ - # Ensure we have batch axis - if start.ndim == 1: - start = start[None] - - if end.ndim == 1: - end = end[None] - - # Compute the score of each tuple(start, end) to be the real answer - outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1)) - - # Remove candidate with end < start and end - start > max_answer_len - candidates = np.tril(np.triu(outer), max_answer_len - 1) - - # Inspired by Chen & al. (https://github.com/facebookresearch/DrQA) - scores_flat = candidates.flatten() - if topk == 1: - idx_sort = [np.argmax(scores_flat)] - elif len(scores_flat) < topk: - idx_sort = np.argsort(-scores_flat) - else: - idx = np.argpartition(-scores_flat, topk)[0:topk] - idx_sort = idx[np.argsort(-scores_flat[idx])] - - start, end = np.unravel_index(idx_sort, candidates.shape)[1:] - return start, end, candidates[0, start, end] - - def span_to_answer( - self, text: str, start: int, end: int - ) -> Dict[str, Union[str, int]]: - """ - When decoding from token probabilities, this method maps token indexes to - actual word in the initial context. - - :param text: The actual context to extract the answer from - :param start: The answer starting token index - :param end: The answer end token index - :return: Dictionary containing the start, end, and answer - """ - words = [] - token_idx = char_start_idx = char_end_idx = chars_idx = 0 - - for i, word in enumerate(text.split(" ")): - token = self.tokenizer.tokenize(word) - - # Append words if they are in the span - if start <= token_idx <= end: - if token_idx == start: - char_start_idx = chars_idx - - if token_idx == end: - char_end_idx = chars_idx + len(word) - - words += [word] - - # Stop if we went over the end of the answer - if token_idx > end: - break - - # Append the subtokenization length to the running index - token_idx += len(token) - chars_idx += len(word) + 1 - - # Join text with spaces - return { - "answer": " ".join(words), - "start": max(0, char_start_idx), - "end": min(len(text), char_end_idx), - } - - def _encode_features_fast(self, examples: Any, **kwargs) -> List[SquadFeatures]: - features_list = [] - for example in examples: - # Define the side we want to truncate / pad and the text/pair sorting - question_first = bool(self.tokenizer.padding_side == "right") - - encoded_inputs = self.tokenizer( - text=example.question_text if question_first else example.context_text, - text_pair=( - example.context_text if question_first else example.question_text - ), - padding=PaddingStrategy.MAX_LENGTH.value, - truncation="only_second" if question_first else "only_first", - max_length=kwargs["max_seq_len"], - stride=kwargs["doc_stride"], - return_tensors="np", - return_token_type_ids=True, - return_overflowing_tokens=True, - return_offsets_mapping=True, - return_special_tokens_mask=True, - ) - - total_spans = len(encoded_inputs["input_ids"]) - - # p_mask: mask with 1 for token than cannot be in the answer - # We put 0 on the tokens from the context and 1 everywhere else - p_mask = np.asarray( - [ - [ - tok != 1 if question_first else 0 - for tok in encoded_inputs.sequence_ids(span_id) - ] - for span_id in range(total_spans) - ] - ) - - # keep the cls_token unmasked - if self.tokenizer.cls_token_id is not None: - cls_index = np.nonzero( - encoded_inputs["input_ids"] == self.tokenizer.cls_token_id - ) - p_mask[cls_index] = 0 - - features = [] - for span_idx in range(total_spans): - features.append( - SquadFeatures( - input_ids=encoded_inputs["input_ids"][span_idx], - attention_mask=encoded_inputs["attention_mask"][span_idx], - token_type_ids=encoded_inputs["token_type_ids"][span_idx], - p_mask=p_mask[span_idx].tolist(), - encoding=encoded_inputs[span_idx], - # the following values are unused for fast tokenizers - cls_index=None, - token_to_orig_map={}, - example_index=0, - unique_id=0, - paragraph_len=0, - token_is_max_context=0, - tokens=[], - start_position=0, - end_position=0, - is_impossible=False, - qas_id=None, - ) - ) - features_list.append(features) - return features_list - - -@dataclass -class TaskInfo: - """ - Information about an NLP task - - :param pipeline_constructor: reference to constructor for the given pipeline task - :param default model name: the transformers canonical name for the default model - :param base_stub: sparsezoo stub path for the base model for this task - :param default_pruned_stub: sparsezoo stub path for the default pruned model - for this task - :param default_quant_stub: sparsezoo stub path for the default quantized model - for this task - """ - - pipeline_constructor: Callable[[Any], Pipeline] - default_model_name: str - base_stub: Optional[str] = None - default_pruned_stub: Optional[str] = None - default_quant_stub: Optional[str] = None - - -# Register all the supported tasks here -SUPPORTED_TASKS = { - "ner": TaskInfo( - pipeline_constructor=TokenClassificationPipeline, - default_model_name="bert-base-uncased", - ), - "question-answering": TaskInfo( - pipeline_constructor=QuestionAnsweringPipeline, - default_model_name="bert-base-uncased", - base_stub=( - "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none" - ), - default_pruned_stub=( - "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/" - "pruned-aggressive_98" - ), - ), - "sentiment-analysis": TaskInfo( - pipeline_constructor=TextClassificationPipeline, - default_model_name="bert-base-uncased", - ), - "text-classification": TaskInfo( - pipeline_constructor=TextClassificationPipeline, - default_model_name="bert-base-uncased", - ), - "token-classification": TaskInfo( - pipeline_constructor=TokenClassificationPipeline, - default_model_name="bert-base-uncased", - ), -} - -DEEPSPARSE_ENGINE = "deepsparse" -ORT_ENGINE = "onnxruntime" - -SUPPORTED_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] - - -def pipeline( - task: str, - model_name: Optional[str] = None, - model_path: Optional[str] = None, - engine_type: str = DEEPSPARSE_ENGINE, - config: Optional[Union[str, PretrainedConfig]] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - max_length: int = 128, - num_cores: Optional[int] = None, - scheduler: Optional[str] = None, - batch_size: Optional[int] = 1, - **kwargs, -) -> Pipeline: - """ - Utility factory method to build a Pipeline - - :param task: name of the task to define which pipeline to create. Currently - supported task - "question-answering" - :param model_name: canonical name of the hugging face model this model is based on - :param model_path: path to model directory containing `model.onnx`, `config.json`, - and `tokenizer.json` files, ONNX model file, or SparseZoo stub - :param engine_type: inference engine name to use. Supported options are 'deepsparse' - and 'onnxruntime' - :param config: huggingface model config, if none provided, default will be used - which will be from the model name or sparsezoo stub if given for model path - :param tokenizer: huggingface tokenizer, if none provided, default will be used - :param max_length: maximum sequence length of model inputs. default is 128 - :param num_cores: number of CPU cores to run engine with. Default is the maximum - available - :param scheduler: The scheduler to use for the engine. Can be None, single or multi. - :param batch_size: The batch_size to use for the pipeline. Defaults to 1 - Note: `question-answering` pipeline only supports a batch_size of 1. - :param kwargs: additional key word arguments for task specific pipeline constructor - :return: Pipeline object for the given taks and model - """ - - # Retrieve the task - if task not in SUPPORTED_TASKS: - raise KeyError( - f"Unknown task {task}, available tasks are {list(SUPPORTED_TASKS.keys())}" - ) - if engine_type not in SUPPORTED_ENGINES: - raise ValueError( - f"Unsupported engine {engine_type}, supported engines " - f"are {SUPPORTED_ENGINES}" - ) - if task == "question-answering" and batch_size != 1: - raise ValueError( - f"{task} pipeline only supports batch_size 1. " - f"Supplied batch_size = {batch_size}" - ) - task_info = SUPPORTED_TASKS[task] - - model_path = model_path or _get_default_model_path(task_info) - model_name = model_name or task_info.default_model_name - - onnx_path, config_path, tokenizer_path = get_onnx_path_and_configs(model_path) - - # default the tokenizer and config to file in model directory or given model name - config = config or config_path or model_name - tokenizer = tokenizer or tokenizer_path or model_name - - # create model - model, input_names = _create_model( - onnx_path, - engine_type, - num_cores, - max_length, - scheduler=scheduler, - batch_size=batch_size, - ) - - # Instantiate tokenizer if needed - if isinstance(tokenizer, (str, tuple)): - if isinstance(tokenizer, tuple): - # For tuple we have (tokenizer name, {kwargs}) - tokenizer_kwargs = tokenizer[1] - tokenizer_kwargs["model_max_length"] = max_length - tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1]) - else: - tokenizer = AutoTokenizer.from_pretrained( - tokenizer, model_max_length=max_length - ) - - # Instantiate config if needed - if config is not None and isinstance(config, str): - config = AutoConfig.from_pretrained(config, finetuning_task=task) - - return task_info.pipeline_constructor( - model=model, - tokenizer=tokenizer, - config=config, - engine_type=engine_type, - max_length=max_length, - input_names=input_names, - **kwargs, - ) - - -def _get_default_model_path(task_info: TaskInfo) -> str: - if cpu.cpu_vnni_compatible() and task_info.default_quant_stub: - return task_info.default_quant_stub - return task_info.default_pruned_stub or task_info.base_stub - - -def _create_model( - model_path: str, - engine_type: str, - num_cores: Optional[int], - max_length: int = 128, - scheduler: Optional[str] = None, - batch_size: int = 1, -) -> Tuple[Union[Engine, "onnxruntime.InferenceSession"], List[str]]: - onnx_path, input_names, _ = overwrite_transformer_onnx_model_inputs( - model_path, max_length=max_length - ) - - if engine_type == DEEPSPARSE_ENGINE: - model = compile_model( - onnx_path, - batch_size=batch_size, - num_cores=num_cores, - scheduler=scheduler, - ) - elif engine_type == ORT_ENGINE: - _validate_ort_import() - sess_options = onnxruntime.SessionOptions() - if num_cores is not None: - sess_options.intra_op_num_threads = num_cores - sess_options.log_severity_level = 3 - sess_options.graph_optimization_level = ( - onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL - ) - - model = onnxruntime.InferenceSession(onnx_path, sess_options=sess_options) - - return model, input_names - - -def _validate_ort_import(): - if ort_import_error is not None: - raise ImportError( - "An exception occurred when importing onxxruntime. Please verify that " - "onnxruntime is installed in order to use the onnxruntime inference " - f"engine. \n\nException info: {ort_import_error}" - ) - - -def process_dataset( - pipeline_object: Callable, - data_path: str, - batch_size: int, - task: str, - output_path: str, -) -> None: - """ - :param pipeline_object: An instantiated pipeline Callable object - :param data_path: Path to input file, supports csv, json and text files - :param batch_size: batch_size to use for inference - :param task: The task pipeline is instantiated for - :param output_path: Path to a json file to output inference results to - """ - batch_loader = get_batch_loader( - data_file=data_path, - batch_size=batch_size, - task=task, - ) - # Wraps pipeline object to make numpy types serializable - pipeline_object = fix_numpy_types(pipeline_object) - with open(output_path, "a") as output_file: - for batch in batch_loader: - batch_output = pipeline_object(**batch) - json.dump(batch_output, output_file) - output_file.write("\n") diff --git a/src/deepsparse/transformers/pipelines/__init__.py b/src/deepsparse/transformers/pipelines/__init__.py new file mode 100644 index 0000000000..9986181a2a --- /dev/null +++ b/src/deepsparse/transformers/pipelines/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .pipeline import * +from .question_answering import * +from .text_classification import * +from .token_classification import * diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py new file mode 100644 index 0000000000..2fdcd27236 --- /dev/null +++ b/src/deepsparse/transformers/pipelines/pipeline.py @@ -0,0 +1,219 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Base Pipeline class for transformers inference pipeline +""" + + +import warnings +from typing import Any, List, Mapping, Optional + +import numpy +from transformers.models.auto import AutoConfig, AutoTokenizer + +from deepsparse import Pipeline +from deepsparse.transformers.helpers import ( + get_onnx_path_and_configs, + overwrite_transformer_onnx_model_inputs, +) + + +__all__ = [ + "TransformersPipeline", + "pipeline", +] + + +class TransformersPipeline(Pipeline): + """ + Base deepsparse.Pipeline class for transformers model loading. This class handles + the parsing of deepsparse-transformers files and model inputs, supporting loading + from sparsezoo, a directory containing a model.onnx, tokenizer, and model config, + or just an ONNX file with the ability to load a tokenizer and model config from + a default huggingface-transformers model. + + Note, when implementing child tasks in deepsparse.transformers.pipelines, + in addition to registering task names with Pipeline.register, task names should + be added to the supported nlp tasks in deepsparse.tasks so they can be properly + imported at runtime. + + :param model_path: sparsezoo stub to a transformers model, an ONNX file, or + (preferred) a directory containing a model.onnx, tokenizer config, and model + config. If no tokenizer and/or model config(s) are found, then they will be + loaded from huggingface transformers using the `default_model_name` key + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: static sequence length to use for inference + :param default_model_name: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the `model_path`. + Default is 'bert-base-uncased' + """ + + def __init__( + self, + *, + sequence_length: int = 128, + default_model_name: str = "bert-base-uncased", + **kwargs, + ): + + self._sequence_length = sequence_length + self._default_model_name = default_model_name + + self.config = None + self.tokenizer = None + self.onnx_input_names = None + + self._temp_model_directory = None + + super().__init__(**kwargs) + + @property + def sequence_length(self) -> int: + """ + :return: static sequence length to use for inference + """ + return self._sequence_length + + @property + def default_model_name(self) -> str: + """ + :return: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the + `model_path` + """ + return self._default_model_name + + def setup_onnx_file_path(self) -> str: + """ + Parses ONNX, tokenizer, and config file paths from the given `model_path`. + Supports sparsezoo stubs. If a tokenizer and/or config file are not found, + they will be defaulted to the default_model_name in the transformers repo + + :return: file path to the processed ONNX file for the engine to compile + """ + onnx_path, config_path, tokenizer_path = get_onnx_path_and_configs( + self.model_path + ) + + # default config + tokenizer if necessary + config_path = config_path or self.default_model_name + tokenizer_path = tokenizer_path or self.default_model_name + + self.config = AutoConfig.from_pretrained( + config_path, finetuning_task=self.task if hasattr(self, "task") else None + ) + self.tokenizer = AutoTokenizer.from_pretrained( + tokenizer_path, model_max_length=self.sequence_length + ) + + # overwrite onnx graph to given required input shape + ( + onnx_path, + self.onnx_input_names, + self._temp_model_directory, + ) = overwrite_transformer_onnx_model_inputs( + onnx_path, max_length=self.sequence_length + ) + + return onnx_path + + def tokens_to_engine_input( + self, tokens: Mapping[Any, numpy.ndarray] + ) -> List[numpy.ndarray]: + """ + :param tokens: outputs of the pipeline tokenizer + :return: list of numpy arrays in expected order for model input + """ + if not all(name in tokens for name in self.onnx_input_names): + raise ValueError( + f"pipeline expected arrays with names {self.onnx_input_names}, " + f"received inputs: {list(tokens.keys())}" + ) + + return [tokens[name] for name in self.onnx_input_names] + + +def pipeline( + task: str, + model_name: Optional[str] = None, + model_path: Optional[str] = None, + engine_type: str = "deepsparse", + config: Optional[str] = None, + tokenizer: Optional[str] = None, + max_length: int = 128, + num_cores: Optional[int] = None, + scheduler: Optional[str] = None, + batch_size: Optional[int] = 1, + **kwargs, +) -> Pipeline: + """ + [DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete DeepSparse + pipelines for tranformers tasks use deepsparse.Pipeline.create(task, ...) + + Utility factory method to build a Pipeline + + :param task: name of the task to define which pipeline to create. Currently + supported task - "question-answering" + :param model_name: canonical name of the hugging face model this model is based on + :param model_path: path to model directory containing `model.onnx`, `config.json`, + and `tokenizer.json` files, ONNX model file, or SparseZoo stub + :param engine_type: inference engine name to use. Options are 'deepsparse' + and 'onnxruntime'. Default is 'deepsparse' + :param config: huggingface model config, if none provided, default will be used + which will be from the model name or sparsezoo stub if given for model path + :param tokenizer: huggingface tokenizer, if none provided, default will be used + :param max_length: maximum sequence length of model inputs. default is 128 + :param num_cores: number of CPU cores to run engine with. Default is the maximum + available + :param scheduler: The scheduler to use for the engine. Can be None, single or multi + :param batch_size: The batch_size to use for the pipeline. Defaults to 1 + Note: `question-answering` pipeline only supports a batch_size of 1. + :param kwargs: additional key word arguments for task specific pipeline constructor + :return: Pipeline object for the given taks and model + """ + warnings.warn( + "[DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete " + "DeepSparse pipelines for tranformers tasks use deepsparse.Pipeline.create()" + ) + + if config is not None or tokenizer is not None: + raise ValueError( + "Directly passing in a config or tokenizer to DeepSparse transformers " + "pipelines is no longer supported. config and tokenizer objects should " + "be specified by including config.json and tokenizer.json files in the " + "model directory respectively" + ) + + return Pipeline.create( + task=task, + model_path=model_path, + engine_type=engine_type, + batch_size=batch_size, + num_cores=num_cores, + scheduler=scheduler, + sequence_length=max_length, + default_model_name=model_name, + **kwargs, + ) diff --git a/src/deepsparse/transformers/pipelines/question_answering.py b/src/deepsparse/transformers/pipelines/question_answering.py new file mode 100644 index 0000000000..ba57117dad --- /dev/null +++ b/src/deepsparse/transformers/pipelines/question_answering.py @@ -0,0 +1,409 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# postprocessing adapted from huggingface/transformers + +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Pipeline implementation and pydantic models for question answering transformers +tasks +""" + + +from typing import Any, Dict, List, Tuple, Type + +import numpy +from pydantic import BaseModel, Field +from transformers.data import ( + SquadExample, + SquadFeatures, + squad_convert_examples_to_features, +) +from transformers.tokenization_utils_base import PaddingStrategy + +from deepsparse import Pipeline +from deepsparse.transformers.pipelines import TransformersPipeline + + +__all__ = [ + "QuestionAnsweringInput", + "QuestionAnsweringOutput", + "QuestionAnsweringPipeline", +] + + +class QuestionAnsweringInput(BaseModel): + """ + Schema for inputs to question_answering pipelines + """ + + question: str = Field(description="String question to be answered") + context: str = Field(description="String representing context for answer") + + +class QuestionAnsweringOutput(BaseModel): + """ + Schema for question_answering pipeline output. Values are in batch order + """ + + score: float = Field(description="confidence score for prediction") + answer: str = Field(description="predicted answer") + start: int = Field(description="start index of the answer") + end: int = Field(description="end index of the answer") + + +@Pipeline.register( + task="question_answering", + task_aliases=["qa"], + default_model_path=( + "zoo:nlp/question_answering/bert-base/pytorch/huggingface/" + "squad/12layer_pruned80_quant-none-vnni" + ), +) +class QuestionAnsweringPipeline(TransformersPipeline): + """ + transformers question_answering pipeline + + example instantiation: + ```python + question_answering = Pipeline.create( + task="question_answering", + model_path="question_answering_model_dir/", + ) + ``` + + :param model_path: sparsezoo stub to a transformers model, an ONNX file, or + (preferred) a directory containing a model.onnx, tokenizer config, and model + config. If no tokenizer and/or model config(s) are found, then they will be + loaded from huggingface transformers using the `default_model_name` key + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + Default is 128 + :param default_model_name: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the `model_path`. + Default is 'bert-base-uncased' + :param doc_stride: if the context is too long to fit with the question for the + model, it will be split in several chunks with some overlap. This argument + controls the size of that overlap. Currently, only reading the first span + is supported (everything after doc_stride will be truncated). Default + is 128 + :param max_question_len: maximum length of the question after tokenization. + It will be truncated if needed. Default is 64 + :param max_answer_len: maximum length of answer after decoding. Default is 15 + """ + + def __init__( + self, + *, + doc_stride: int = 128, + max_question_length: int = 64, + max_answer_length: int = 15, + **kwargs, + ): + + if kwargs.get("batch_size") and kwargs["batch_size"] > 1: + raise ValueError( + f"{self.__class__.__name__} currently only supports batch size 1, " + f"batch size set to {kwargs['batch_size']}" + ) + + self._doc_stride = doc_stride + self._max_question_length = max_question_length + self._max_answer_length = max_answer_length + + super().__init__(**kwargs) + + @property + def doc_stride(self) -> int: + """ + :return: if the context is too long to fit with the question for the + model, it will be split in several chunks with some overlap. This argument + controls the size of that overlap. Currently, only reading the first span + is supported (everything after doc_stride will be truncated) + """ + return self._doc_stride + + @property + def max_answer_length(self) -> int: + """ + :return: maximum length of answer after decoding + """ + return self._max_answer_length + + @property + def max_question_length(self) -> int: + """ + :return: maximum length of the question after tokenization. + It will be truncated if needed + """ + return self._max_question_length + + @property + def input_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return QuestionAnsweringInput + + @property + def output_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return QuestionAnsweringOutput + + def process_inputs( + self, + inputs: QuestionAnsweringInput, + ) -> Tuple[List[numpy.ndarray], Dict[str, Any]]: + """ + :param inputs: inputs to the pipeline. Must be the type of the + QuestionAnsweringInput + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine and + dictionary of parsed features and original extracted example + """ + squad_example = SquadExample( + None, inputs.question, inputs.context, None, None, None + ) + features = self._tokenize(squad_example) + tokens = features.__dict__ + + engine_inputs = self.tokens_to_engine_input(tokens) + # add batch dimension, assuming batch size 1 + engine_inputs = [numpy.expand_dims(inp, axis=0) for inp in engine_inputs] + + return engine_inputs, dict( + features=features, + example=squad_example, + ) + + def process_engine_outputs( + self, engine_outputs: List[numpy.ndarray], **kwargs + ) -> BaseModel: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + features = kwargs["features"] + example = kwargs["example"] + start_vals, end_vals = engine_outputs[:2] + + # assuming batch size 0 + start = start_vals[0] + end = end_vals[0] + + # Ensure padded tokens & question tokens cannot belong + undesired_tokens = ( + numpy.abs(numpy.array(features.p_mask) - 1) & features.attention_mask + ) + + # Generate mask + undesired_tokens_mask = undesired_tokens == 0.0 + + # Make sure non-context indexes cannot contribute to the softmax + start = numpy.where(undesired_tokens_mask, -10000.0, start) + end = numpy.where(undesired_tokens_mask, -10000.0, end) + + # Normalize logits and spans to retrieve the answer + start = numpy.exp( + start - numpy.log(numpy.sum(numpy.exp(start), axis=-1, keepdims=True)) + ) + end = numpy.exp( + end - numpy.log(numpy.sum(numpy.exp(end), axis=-1, keepdims=True)) + ) + + # Mask CLS + start[0] = 0.0 + end[0] = 0.0 + + ans_start, ans_end, scores = self._decode(start, end) + # assuming one stride, so grab first idx + ans_start = ans_start[0] + ans_end = ans_end[0] + score = scores[0] + + # decode start, end idx into text + if not self.tokenizer.is_fast: + char_to_word = numpy.array(example.char_to_word_offset) + return self.output_schema( + score=score.item(), + start=numpy.where( + char_to_word == features.token_to_orig_map[ans_start] + )[0][0].item(), + end=numpy.where(char_to_word == features.token_to_orig_map[ans_end])[0][ + -1 + ].item(), + answer=" ".join( + example.doc_tokens[ + features.token_to_orig_map[ + ans_start + ] : features.token_to_orig_map[ans_end] + + 1 + ] + ), + ) + else: + question_first = bool(self.tokenizer.padding_side == "right") + + # Sometimes the max probability token is in the middle of a word so: + # we start by finding the right word containing the token with + # `token_to_word` then we convert this word in a character span + return self.output_schema( + score=score.item(), + start=features.encoding.word_to_chars( + features.encoding.token_to_word(ans_start), + sequence_index=1 if question_first else 0, + )[0], + end=features.encoding.word_to_chars( + features.encoding.token_to_word(ans_end), + sequence_index=1 if question_first else 0, + )[1], + answer=example.context_text[ + features.encoding.word_to_chars( + features.encoding.token_to_word(ans_start), + sequence_index=1 if question_first else 0, + )[0] : features.encoding.word_to_chars( + features.encoding.token_to_word(ans_end), + sequence_index=1 if question_first else 0, + )[ + 1 + ] + ], + ) + + def _tokenize(self, example: SquadExample): + if not self.tokenizer.is_fast: + features = squad_convert_examples_to_features( + examples=[example], + tokenizer=self.tokenizer, + max_set_length=self.sequence_length, + doc_stride=self.doc_stride, + max_query_length=self.max_question_length, + padding_strategy=PaddingStrategy.MAX_LENGTH.value, + is_training=False, + tqdm_enabled=False, + ) + # only 1 span supported so taking only the first element of features + # to add support for num_spans switch to features = features[:num_spans] + # not included for now due to static batch requirements in production + features = features[0] + else: + question_first = bool(self.tokenizer.padding_side == "right") + encoded_inputs = self.tokenizer( + text=example.question_text if question_first else example.context_text, + text_pair=( + example.context_text if question_first else example.question_text + ), + padding=PaddingStrategy.MAX_LENGTH.value, + truncation="only_second" if question_first else "only_first", + max_length=self.sequence_length, + stride=self.doc_stride, + return_tensors="np", + return_token_type_ids=True, + return_overflowing_tokens=True, + return_offsets_mapping=True, + return_special_tokens_mask=True, + ) + + # only 1 span supported so taking only the first element of features + # to add support for num_spans switch hardcoded 0 idx lookups to loop + # over values in num_spans + + # p_mask: mask with 1 for token than cannot be in the answer + # We put 0 on the tokens from the context and 1 everywhere else + p_mask = numpy.asarray( + [ + [ + tok != 1 if question_first else 0 + for tok in encoded_inputs.sequence_ids(0) + ] + ] + ) + + # keep the cls_token unmasked + if self.tokenizer.cls_token_id is not None: + cls_index = numpy.nonzero( + encoded_inputs["input_ids"][0] == self.tokenizer.cls_token_id + ) + p_mask[cls_index] = 0 + + features = SquadFeatures( + input_ids=encoded_inputs["input_ids"][0], + attention_mask=encoded_inputs["attention_mask"][0], + token_type_ids=encoded_inputs["token_type_ids"][0], + p_mask=p_mask[0].tolist(), + encoding=encoded_inputs[0], + # the following values are unused for fast tokenizers + cls_index=None, + token_to_orig_map={}, + example_index=0, + unique_id=0, + paragraph_len=0, + token_is_max_context=0, + tokens=[], + start_position=0, + end_position=0, + is_impossible=False, + qas_id=None, + ) + + return features + + def _decode(self, start: numpy.ndarray, end: numpy.ndarray) -> Tuple: + # Ensure we have batch axis + if start.ndim == 1: + start = start[None] + + if end.ndim == 1: + end = end[None] + + # Compute the score of each tuple(start, end) to be the real answer + outer = numpy.matmul(numpy.expand_dims(start, -1), numpy.expand_dims(end, 1)) + + # Remove candidate with end < start and end - start > max_answer_len + candidates = numpy.tril(numpy.triu(outer), self.max_answer_length - 1) + + # Inspired by Chen & al. (https://github.com/facebookresearch/DrQA) + scores_flat = candidates.flatten() + # only returning best result, use argsort for topk support + idx_sort = [numpy.argmax(scores_flat)] + + start, end = numpy.unravel_index(idx_sort, candidates.shape)[1:] + return start, end, candidates[0, start, end] diff --git a/src/deepsparse/transformers/pipelines/text_classification.py b/src/deepsparse/transformers/pipelines/text_classification.py new file mode 100644 index 0000000000..0df9ba2b59 --- /dev/null +++ b/src/deepsparse/transformers/pipelines/text_classification.py @@ -0,0 +1,221 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# postprocessing adapted from huggingface/transformers + +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Pipeline implementation and pydantic models for text classification transformers +tasks +""" + + +from typing import List, Type, Union + +import numpy +from pydantic import BaseModel, Field +from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy + +from deepsparse import Pipeline +from deepsparse.transformers.pipelines import TransformersPipeline + + +__all__ = [ + "TextClassificationInput", + "TextClassificationOutput", + "TextClassificationPipeline", +] + + +class TextClassificationInput(BaseModel): + """ + Schema for inputs to text_classification pipelines + """ + + sequences: Union[List[List[str]], List[str], str] = Field( + description="A string or List of strings representing input to" + "text_classification task" + ) + + +class TextClassificationOutput(BaseModel): + """ + Schema for text_classification pipeline output. Values are in batch order + """ + + labels: List[str] = Field(description="The predicted labels in batch order") + scores: List[float] = Field( + description="The corresponding probability for each label in the batch" + ) + + +@Pipeline.register( + task="text_classification", + task_aliases=["glue", "sentiment_analysis"], + default_model_path=( + "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/" + "sst2/12layer_pruned80_quant-none-vnni" + ), +) +class TextClassificationPipeline(TransformersPipeline): + """ + transformers text classification pipeline + + example instantiation: + ```python + text_classifier = Pipeline.create( + task="text_classification", + model_path="text_classification_model_dir/", + batch_size=BATCH_SIZE, + ) + ``` + + example batch size 1, single text inputs (ie sentiment analysis): + ```python + sentiment = text_classifier("the food tastes great") + sentiment = text_classifier(["the food tastes great"]) + sentiment = text_classifier([["the food tastes great"]]) + ``` + + example batch size 1, multi text input (ie QQP like tasks): + ```python + prediction = text_classifier([["how is the food?", "what is the food?"]]) + ``` + + example batch size n, single text inputs: + ```python + sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) + sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) + ``` + + :param model_path: sparsezoo stub to a transformers model, an ONNX file, or + (preferred) a directory containing a model.onnx, tokenizer config, and model + config. If no tokenizer and/or model config(s) are found, then they will be + loaded from huggingface transformers using the `default_model_name` key + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + Default is 128 + :param default_model_name: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the `model_path`. + Default is 'bert-base-uncased' + """ + + @property + def input_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return TextClassificationInput + + @property + def output_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return TextClassificationOutput + + def parse_inputs(self, *args, **kwargs) -> BaseModel: + """ + :param args: ordered arguments to pipeline, only an input_schema object + is supported as an arg for this function + :param kwargs: keyword arguments to pipeline + :return: pipeline arguments parsed into the given `input_schema` + schema if necessary. If an instance of the `input_schema` is provided + it will be returned + """ + if args and kwargs: + raise ValueError( + f"{self.__class__} only support args OR kwargs. Found " + f" {len(args)} args and {len(kwargs)} kwargs" + ) + + if args: + if len(args) == 1: + # passed input_schema schema directly + if isinstance(args[0], self.input_schema): + return args[0] + return self.input_schema(sequences=args[0]) + else: + return self.input_schema(sequences=args) + + return self.input_schema(**kwargs) + + def process_inputs(self, inputs: TextClassificationInput) -> List[numpy.ndarray]: + """ + :param inputs: inputs to the pipeline. Must be the type of the + TextClassificationInput + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine + """ + tokens = self.tokenizer( + inputs.sequences, + add_special_tokens=True, + return_tensors="np", + padding=PaddingStrategy.MAX_LENGTH.value, + truncation=TruncationStrategy.LONGEST_FIRST.value, + ) + return self.tokens_to_engine_input(tokens) + + def process_engine_outputs(self, engine_outputs: List[numpy.ndarray]) -> BaseModel: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + outputs = engine_outputs + if isinstance(outputs, list): + outputs = outputs[0] + + scores = ( + 1.0 / (1.0 + numpy.exp(-outputs)) + if self.config.num_labels == 1 + else numpy.exp(outputs) / numpy.exp(outputs).sum(-1, keepdims=True) + ) + + labels = [] + label_scores = [] + + for score in scores: + labels.append(self.config.id2label[score.argmax()]) + label_scores.append(score.max().item()) + + return self.output_schema( + labels=labels, + scores=label_scores, + ) diff --git a/src/deepsparse/transformers/pipelines/token_classification.py b/src/deepsparse/transformers/pipelines/token_classification.py new file mode 100644 index 0000000000..6485df668e --- /dev/null +++ b/src/deepsparse/transformers/pipelines/token_classification.py @@ -0,0 +1,499 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# postprocessing adapted from huggingface/transformers + +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Pipeline implementation and pydantic models for token classification transformers +tasks +""" +from typing import Any, Dict, List, Optional, Tuple, Type, Union + +import numpy +from pydantic import BaseModel, Field +from transformers.file_utils import ExplicitEnum +from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy + +from deepsparse import Pipeline +from deepsparse.transformers.pipelines import TransformersPipeline + + +__all__ = [ + "AggregationStrategy", + "TokenClassificationInput", + "TokenClassificationResult", + "TokenClassificationOutput", + "TokenClassificationPipeline", +] + + +class AggregationStrategy(ExplicitEnum): + """ + Valid aggregation strategies for postprocessing in the TokenClassificationPipeline + """ + + NONE = "none" + SIMPLE = "simple" + FIRST = "first" + AVERAGE = "average" + MAX = "max" + + +class TokenClassificationInput(BaseModel): + """ + Schema for inputs to token_classification pipelines + """ + + inputs: Union[List[str], str] = Field( + description=( + "A string or List of batch of strings representing input(s) to" + "a token_classification task" + ) + ) + + +class TokenClassificationResult(BaseModel): + """ + Schema for a classification of a single token + """ + + entity: str = Field(description="entity predicted for that token/word") + score: float = Field(description="The corresponding probability for `entity`") + index: int = Field(description="index of the corresponding token in the sentence") + word: str = Field(description="token/word classified") + start: Optional[int] = Field( + description=( + "index of the start of the corresponding entity in the sentence. " + "Only exists if the offsets are available within the tokenizer" + ) + ) + end: Optional[int] = Field( + description=( + "index of the end of the corresponding entity in the sentence. " + "Only exists if the offsets are available within the tokenizer" + ) + ) + is_grouped: bool = Field( + default=False, + description="True if this result is part of an entity group", + ) + + +class TokenClassificationOutput(BaseModel): + """ + Schema for results of TokenClassificationPipeline inference. Classifications of each + token stored in a list of lists of batch[sentence[token]] + """ + + predictions: List[List[TokenClassificationResult]] = Field( + description=( + "list of list of results of token classification pipeline. Outer list " + "has one item for each sequence in the batch. Inner list has one " + "TokenClassificationResult item per token in the given sequence" + ) + ) + + +@Pipeline.register( + task="token_classification", + task_aliases=["ner"], + default_model_path=( + "zoo:nlp/token_classification/bert-base/pytorch/huggingface/" + "conll2003/12layer_pruned80_quant-none-vnni" + ), +) +class TokenClassificationPipeline(TransformersPipeline): + """ + transformers token classification pipeline + + example instantiation: + ```python + token_classifier = Pipeline.create( + task="token_classification", + model_path="token_classification_model_dir/", + batch_size=BATCH_SIZE, + ) + ``` + + :param model_path: sparsezoo stub to a transformers model, an ONNX file, or + (preferred) a directory containing a model.onnx, tokenizer config, and model + config. If no tokenizer and/or model config(s) are found, then they will be + loaded from huggingface transformers using the `default_model_name` key + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + Default is 128 + :param default_model_name: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the `model_path`. + Default is 'bert-base-uncased' + :param aggregation_strategy: how to aggregate tokens in postprocessing. Options + include 'none', 'simple', 'first', 'average', and 'max'. Default is None + :param ignore_labels: list of label names to ignore in output. Default is + ['0'] which ignores the default known class label + """ + + def __init__( + self, + *, + aggregation_strategy: AggregationStrategy = AggregationStrategy.NONE, + ignore_labels: List[str] = None, + **kwargs, + ): + + if isinstance(aggregation_strategy, str): + aggregation_strategy = aggregation_strategy.strip().lower() + self._aggregation_strategy = AggregationStrategy(aggregation_strategy) + self._ignore_labels = ["0"] if ignore_labels is None else ignore_labels + + super().__init__(**kwargs) + + @property + def aggregation_strategy(self) -> str: + """ + :return: how to aggregate tokens in postprocessing. Options + include 'none', 'simple', 'first', 'average', and 'max' + """ + return self._aggregation_strategy.value + + @property + def ignore_labels(self) -> List[str]: + """ + :return: list of label names to ignore in output. Default is + ['0'] which ignores the default known class label + """ + return self._ignore_labels + + @property + def input_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return TokenClassificationInput + + @property + def output_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return TokenClassificationOutput + + def parse_inputs(self, *args, **kwargs) -> BaseModel: + """ + :param args: ordered arguments to pipeline, only an input_schema object + is supported as an arg for this function + :param kwargs: keyword arguments to pipeline + :return: pipeline arguments parsed into the given `input_schema` + schema if necessary. If an instance of the `input_schema` is provided + it will be returned + """ + if args and kwargs: + raise ValueError( + f"{self.__class__} only support args OR kwargs. Found " + f" {len(args)} args and {len(kwargs)} kwargs" + ) + + if args: + if len(args) == 1: + # passed input_schema schema directly + if isinstance(args[0], self.input_schema): + return args[0] + return self.input_schema(inputs=args[0]) + else: + return self.input_schema(inputs=args) + + return self.input_schema(**kwargs) + + def process_inputs( + self, + inputs: TokenClassificationInput, + ) -> Tuple[List[numpy.ndarray], Dict[str, Any]]: + """ + :param inputs: inputs to the pipeline. Must be the type of the + TokenClassificationInput + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine + and dictionary containing offset mappings and special tokens mask to + be used during postprocessing + """ + tokens = self.tokenizer( + inputs.inputs, + return_tensors="np", + truncation=TruncationStrategy.LONGEST_FIRST.value, + padding=PaddingStrategy.MAX_LENGTH.value, + return_special_tokens_mask=True, + return_offsets_mapping=self.tokenizer.is_fast, + ) + + offset_mapping = ( + tokens.pop("offset_mapping") + if self.tokenizer.is_fast + else [None] * len(inputs.inputs) + ) + special_tokens_mask = tokens.pop("special_tokens_mask") + postprocessing_kwargs = dict( + inputs=inputs, + tokens=tokens, + offset_mapping=offset_mapping, + special_tokens_mask=special_tokens_mask, + ) + + return self.tokens_to_engine_input(tokens), postprocessing_kwargs + + def process_engine_outputs( + self, + engine_outputs: List[numpy.ndarray], + **kwargs, + ) -> BaseModel: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + inputs = kwargs["inputs"] + tokens = kwargs["tokens"] + offset_mapping = kwargs["offset_mapping"] + special_tokens_mask = kwargs["special_tokens_mask"] + + predictions = [] # type: List[List[TokenClassificationResult]] + + for entities_index, current_entities in enumerate(engine_outputs[0]): + input_ids = tokens["input_ids"][entities_index] + + scores = numpy.exp(current_entities) / numpy.exp(current_entities).sum( + -1, keepdims=True + ) + pre_entities = self._gather_pre_entities( + inputs.inputs[entities_index], + input_ids, + scores, + offset_mapping[entities_index], + special_tokens_mask[entities_index], + ) + grouped_entities = self._aggregate(pre_entities) + # Filter anything that is in self.ignore_labels + current_results = [] # type: List[TokenClassificationResult] + for entity in grouped_entities: + if entity.get("entity") in self.ignore_labels or ( + entity.get("entity_group") in self.ignore_labels + ): + continue + if entity.get("entity_group"): + entity["entity"] = entity["entity_group"] + entity["is_grouped"] = True + del entity["entity_group"] + current_results.append(TokenClassificationResult(**entity)) + predictions.append(current_results) + + return self.output_schema(predictions=predictions) + + # utilities below adapted from transformers + + def _gather_pre_entities( + self, + sentence: str, + input_ids: numpy.ndarray, + scores: numpy.ndarray, + offset_mapping: Optional[List[Tuple[int, int]]], + special_tokens_mask: numpy.ndarray, + ) -> List[dict]: + pre_entities = [] + for idx, token_scores in enumerate(scores): + # Filter special_tokens, they should only occur + # at the sentence boundaries since we're not encoding pairs of + # sentences so we don't have to keep track of those. + if special_tokens_mask[idx]: + continue + + word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx])) + if offset_mapping is not None: + start_ind, end_ind = offset_mapping[idx] + word_ref = sentence[start_ind:end_ind] + is_subword = len(word_ref) != len(word) + + if int(input_ids[idx]) == self.tokenizer.unk_token_id: + word = word_ref + is_subword = False + else: + start_ind = None + end_ind = None + is_subword = False + + pre_entity = { + "word": word, + "scores": token_scores, + "start": start_ind, + "end": end_ind, + "index": idx, + "is_subword": is_subword, + } + pre_entities.append(pre_entity) + return pre_entities + + def _aggregate(self, pre_entities: List[dict]) -> List[dict]: + if self._aggregation_strategy in { + AggregationStrategy.NONE, + AggregationStrategy.SIMPLE, + }: + entities = [] + for pre_entity in pre_entities: + entity_idx = pre_entity["scores"].argmax() + score = pre_entity["scores"][entity_idx] + entity = { + "entity": self.config.id2label[entity_idx], + "score": score, + "index": pre_entity["index"], + "word": pre_entity["word"], + "start": pre_entity["start"], + "end": pre_entity["end"], + } + entities.append(entity) + else: + entities = self._aggregate_words(pre_entities) + + if self._aggregation_strategy == AggregationStrategy.NONE: + return entities + return self._group_entities(entities) + + def _aggregate_word(self, entities: List[dict]) -> dict: + word = self.tokenizer.convert_tokens_to_string( + [entity["word"] for entity in entities] + ) + if self._aggregation_strategy == AggregationStrategy.FIRST: + scores = entities[0]["scores"] + idx = scores.argmax() + score = scores[idx] + entity = self.config.id2label[idx] + elif self._aggregation_strategy == AggregationStrategy.MAX: + max_entity = max(entities, key=lambda entity: entity["scores"].max()) + scores = max_entity["scores"] + idx = scores.argmax() + score = scores[idx] + entity = self.config.id2label[idx] + elif self._aggregation_strategy == AggregationStrategy.AVERAGE: + scores = numpy.stack([entity["scores"] for entity in entities]) + average_scores = numpy.nanmean(scores, axis=0) + entity_idx = average_scores.argmax() + entity = self.config.id2label[entity_idx] + score = average_scores[entity_idx] + else: + raise ValueError( + f"Invalid aggregation_strategy: {self._aggregation_strategy}" + ) + new_entity = { + "entity": entity, + "score": score, + "word": word, + "start": entities[0]["start"], + "end": entities[-1]["end"], + } + return new_entity + + def _aggregate_words(self, entities: List[dict]) -> List[dict]: + word_entities = [] + word_group = None + for entity in entities: + if word_group is None: + word_group = [entity] + elif entity["is_subword"]: + word_group.append(entity) + else: + word_entities.append(self._aggregate_word(word_group)) + word_group = [entity] + # Last item + word_entities.append(self._aggregate_word(word_group)) + return word_entities + + def _group_sub_entities(self, entities: List[dict]) -> dict: + # Get the first entity in the entity group + entity = entities[0]["entity"].split("-")[-1] + scores = numpy.nanmean([entity["score"] for entity in entities]) + tokens = [entity["word"] for entity in entities] + + entity_group = { + "entity_group": entity, + "score": numpy.mean(scores), + "word": self.tokenizer.convert_tokens_to_string(tokens), + "start": entities[0]["start"], + "end": entities[-1]["end"], + } + return entity_group + + def _get_tag(self, entity_name: str) -> Tuple[str, str]: + if entity_name.startswith("B-"): + bi = "B" + tag = entity_name[2:] + elif entity_name.startswith("I-"): + bi = "I" + tag = entity_name[2:] + else: + # It's not in B-, I- format + bi = "B" + tag = entity_name + return bi, tag + + def _group_entities(self, entities: List[dict]) -> List[dict]: + + entity_groups = [] + entity_group_disagg = [] + + for entity in entities: + if not entity_group_disagg: + entity_group_disagg.append(entity) + continue + + # If the current entity is similar and adjacent to the previous entity, + # append it to the disaggregated entity group + # The split is meant to account for the "B" and "I" prefixes + # Shouldn't merge if both entities are B-type + bi, tag = self._get_tag(entity["entity"]) + last_bi, last_tag = self._get_tag(entity_group_disagg[-1]["entity"]) + + if tag == last_tag and bi != "B": + # Modify subword type to be previous_type + entity_group_disagg.append(entity) + else: + # If the current entity is different from the previous entity + # aggregate the disaggregated entity group + entity_groups.append(self._group_sub_entities(entity_group_disagg)) + entity_group_disagg = [entity] + if entity_group_disagg: + # it's the last entity, add it to the entity groups + entity_groups.append(self._group_sub_entities(entity_group_disagg)) + + return entity_groups diff --git a/src/deepsparse/transformers/server.py b/src/deepsparse/transformers/server.py deleted file mode 100644 index 59035dba80..0000000000 --- a/src/deepsparse/transformers/server.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Specs, schemas, and pipelines for use when serving transformers models -""" - -from typing import Any, Dict, List, Optional, Tuple, Union - -from deepsparse.tasks import SupportedTasks -from deepsparse.transformers.pipelines import Pipeline, pipeline - - -try: - from deepsparse.server.config import ServeModelConfig - - deepsparse_server_err = None -except Exception as _err: - deepsparse_server_err = _err - ServeModelConfig = object - -try: - from pydantic import BaseModel, Field - - pydantic_import_err = None -except Exception as _err: - pydantic_import_err = _err - BaseModel = object - Field = dict - - -__all__ = [ - "create_pipeline_definitions", - "QuestionAnsweringRequest", - "QuestionAnsweringResponse", - "TextClassificationRequest", - "TextClassificationResponse", - "TokenClassificationRequest", - "TokenClassificationResponse", -] - - -def create_pipeline_definitions( - model_config: ServeModelConfig, -) -> Tuple[Pipeline, Any, Any, Dict]: - """ - Create a pipeline definition and the supporting files for a given model config - to use for serving in the DeepSparse inference server - - :param model_config: the server model config describing the model and params - :return: a tuple containing (the pipeline to use for inference, - the expected request body, the expected response body, - any additional keyword args for use with the server) - """ - if deepsparse_server_err: - raise deepsparse_server_err - - if pydantic_import_err: - raise pydantic_import_err - - if SupportedTasks.nlp.question_answering.matches(model_config.task): - request_model = QuestionAnsweringRequest - response_model = Union[ - List[QuestionAnsweringResponse], - QuestionAnsweringResponse, - ] - kwargs = {} - elif SupportedTasks.nlp.text_classification.matches(model_config.task): - request_model = TextClassificationRequest - response_model = Union[ - List[TextClassificationResponse], List[List[TextClassificationResponse]] - ] - kwargs = {} - elif SupportedTasks.nlp.token_classification.matches(model_config.task): - request_model = TokenClassificationRequest - response_model = Union[ - List[TokenClassificationResponse], List[List[TokenClassificationResponse]] - ] - kwargs = {} - else: - raise ValueError( - f"unrecognized task given of {model_config.task} for config {model_config}" - ) - - pipeline_instance: Pipeline = pipeline( - task=model_config.task.lower().replace("_", "-"), - model_path=model_config.model_path, - engine_type=model_config.engine, - num_cores=model_config.num_cores, - scheduler=model_config.scheduler, - batch_size=model_config.batch_size, - **model_config.kwargs, - ) - - return pipeline_instance, request_model, response_model, kwargs - - -class QuestionAnsweringRequest(BaseModel): - """ - The request model for Question Answering Task - """ - - question: Union[List[str], str] = Field( - description="Either a string or a List of string questions to answer" - ) - context: Union[List[str], str] = Field( - description="Either a string or List of strings representing the context " - "for each question" - ) - - -class TokenClassificationRequest(BaseModel): - """ - Schema for TokenClassificationPipeline Request - """ - - inputs: Union[List[str], str] = Field( - description="A string or List of strings representing input to" - "TokenClassificationPipeline task" - ) - - -class TextClassificationRequest(BaseModel): - """ - Schema for TextClassificationPipeline Request - """ - - sequences: Union[List[str], str] = Field( - description="A string or List of strings representing input to" - "TextClassificationPipeline task" - ) - - -class QuestionAnsweringResponse(BaseModel): - """ - Schema for a result from Question Answering Task - """ - - score: float = Field(description="confidence score for prediction") - start: int = Field(description="The start index of the answer") - end: int = Field(description="The end index of the answer") - answer: str = Field(description="The predicted answer") - - -class TokenClassificationResponse(BaseModel): - """ - Schema for TokenClassificationPipeline Response - """ - - entity: str = Field( - description="The entity predicted for that token/word (it is named" - "`entity_group` when `aggregation_strategy` is not `none`." - ) - score: float = Field(description="The corresponding probability for `entity`.") - index: int = Field( - description="The index of the corresponding token in the sentence." - ) - word: str = Field(description="The token/word classified.") - start: Optional[int] = Field( - description="The index of the start of the corresponding entity in the " - "sentence. Only exists if the offsets are available within the tokenizer" - ) - end: Optional[int] = Field( - description="The index of the end of the corresponding entity in the sentence. " - "Only exists if the offsets are available within the tokenizer" - ) - - -class TextClassificationResponse(BaseModel): - """ - Schema for TextClassificationPipeline Response - """ - - label: str = Field(description="The label predicted.") - score: float = Field(description="The corresponding probability.") diff --git a/src/deepsparse/yolo/__init__.py b/src/deepsparse/yolo/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/src/deepsparse/yolo/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/deepsparse/yolo/annotate.py b/src/deepsparse/yolo/annotate.py new file mode 100644 index 0000000000..72f7770934 --- /dev/null +++ b/src/deepsparse/yolo/annotate.py @@ -0,0 +1,232 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage: deepsparse.object_detection.annotate [OPTIONS] + + Annotation Script for YOLO with DeepSparse + +Options: + --model_filepath, --model-filepath TEXT + Path/SparseZoo stub to the model file to be + used for annotation [default: zoo:cv/detect + ion/yolov5-s/pytorch/ultralytics/coco/pruned + -aggressive_96] + --source TEXT File path to image or directory of .jpg + files, a .mp4 video, or an integer (i.e. 0) + for webcam [required] + --engine [deepsparse|onnxruntime|torch] + Inference engine backend to run on. Choices + are 'deepsparse', 'onnxruntime', and + 'torch'. Default is 'deepsparse' + --image_shape, --image_shape INTEGER... + Image shape to use for inference, must be + two integers [default: 640, 640] + --num_cores, --num-cores INTEGER + The number of physical cores to run the + annotations with, defaults to using all + physical cores available on the system. For + DeepSparse benchmarks, this value is the + number of cores per socket + --save_dir, --save-dir DIRECTORY + The path to the directory for saving results + [default: annotation-results] + --name TEXT Name of directory in save-dir to write + results to. defaults to + {engine}-annotations-{run_number} + --target_fps, --target-fps FLOAT + Target FPS when writing video files. Frames + will be dropped to closely match target FPS. + --source must be a video file and if target- + fps is greater than the source video fps + then it will be ignored + --no_save, --no-save Set flag when source is from webcam to not + save results.Not supported for non-webcam + sources [default: False] + --help Show this message and exit. + +####### +Examples: + +1) deepsparse.object_detection.annotate --source PATH/TO/IMAGE.jpg +2) deepsparse.object_detection.annotate --source PATH/TO/VIDEO.mp4 +3) deepsparse.object_detection.annotate --source 0 +4) deepsparse.object_detection.annotate --source PATH/TO/IMAGE_DIR +""" +import logging +from typing import Optional + +import click + +import cv2 +from deepsparse.pipeline import Pipeline +from deepsparse.yolo import utils +from deepsparse.yolo.utils.cli_helpers import create_dir_callback + + +yolo_v5_default_stub = ( + "zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/" "pruned-aggressive_96" +) + +DEEPSPARSE_ENGINE = "deepsparse" +ORT_ENGINE = "onnxruntime" +TORCH_ENGINE = "torch" + +_LOGGER = logging.getLogger(__name__) + + +@click.command() +@click.option( + "--model_filepath", + "--model-filepath", + type=str, + default=yolo_v5_default_stub, + help="Path/SparseZoo stub to the model file to be used for annotation", + show_default=True, +) +@click.option( + "--source", + type=str, + required=True, + help="File path to image or directory of .jpg files, a .mp4 video, " + "or an integer (i.e. 0) for webcam", +) +@click.option( + "--engine", + type=click.Choice([DEEPSPARSE_ENGINE, ORT_ENGINE, TORCH_ENGINE]), + default=DEEPSPARSE_ENGINE, + help="Inference engine backend to run on. Choices are 'deepsparse', " + "'onnxruntime', and 'torch'. Default is 'deepsparse'", +) +@click.option( + "--image_shape", + "--image_shape", + type=int, + nargs=2, + default=(640, 640), + help="Image shape to use for inference, must be two integers", + show_default=True, +) +@click.option( + "--num_cores", + "--num-cores", + type=int, + default=None, + help="The number of physical cores to run the annotations with, " + "defaults to using all physical cores available on the system." + " For DeepSparse benchmarks, this value is the number of cores " + "per socket", + show_default=True, +) +@click.option( + "--save_dir", + "--save-dir", + type=click.Path(dir_okay=True, file_okay=False), + default="annotation-results", + callback=create_dir_callback, + help="The path to the directory for saving results", + show_default=True, +) +@click.option( + "--name", + type=str, + default=None, + help="Name of directory in save-dir to write results to. defaults to " + "{engine}-annotations-{run_number}", +) +@click.option( + "--target_fps", + "--target-fps", + type=float, + default=None, + help="Target FPS when writing video files. Frames will be dropped to " + "closely match target FPS. --source must be a video file and if " + "target-fps is greater than the source video fps then it " + "will be ignored", + show_default=True, +) +@click.option( + "--no_save", + "--no-save", + is_flag=True, + help="Set flag when source is from webcam to not save results." + "Not supported for non-webcam sources", + show_default=True, +) +def main( + model_filepath: str, + source: str, + engine: str, + image_shape: tuple, + num_cores: Optional[int], + save_dir: str, + name: Optional[str], + target_fps: Optional[float], + no_save: bool, +) -> None: + """ + Annotation Script for YOLO with DeepSparse + """ + save_dir = utils.get_annotations_save_dir( + initial_save_dir=save_dir, + tag=name, + engine=engine, + ) + + loader, saver, is_video = utils.get_yolo_loader_and_saver( + path=source, + save_dir=save_dir, + image_shape=image_shape, + target_fps=target_fps, + no_save=no_save, + ) + + is_webcam = source.isnumeric() + yolo_pipeline = Pipeline.create( + task="yolo", + model_path=model_filepath, + class_names="coco", + engine_type=engine, + num_cores=num_cores, + ) + + for iteration, (input_image, source_image) in enumerate(loader): + + # annotate + annotated_images = utils.annotate( + pipeline=yolo_pipeline, + image_batch=input_image, + target_fps=target_fps, + calc_fps=is_video, + original_images=[source_image], + ) + + for annotated_image in annotated_images: + # display + if is_webcam: + cv2.imshow("annotated", annotated_image) + cv2.waitKey(1) + + # save + if saver: + saver.save_frame(annotated_image) + + if saver: + saver.close() + + _LOGGER.info(f"Results saved to {save_dir}") + + +if __name__ == "__main__": + main() diff --git a/src/deepsparse/yolo/pipelines.py b/src/deepsparse/yolo/pipelines.py new file mode 100644 index 0000000000..2398313c31 --- /dev/null +++ b/src/deepsparse/yolo/pipelines.py @@ -0,0 +1,248 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import Dict, List, Optional, Tuple, Type, Union + +import numpy +import onnx + +from deepsparse.pipeline import Pipeline +from deepsparse.utils import model_to_path +from deepsparse.yolo.schemas import YOLOInput, YOLOOutput +from deepsparse.yolo.utils import COCO_CLASSES, YoloPostprocessor, postprocess_nms + + +try: + import cv2 + + cv2_error = None +except ModuleNotFoundError as cv2_import_error: + cv2 = None + cv2_error = cv2_import_error + + +@Pipeline.register( + task="yolo", + default_model_path=( + "zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned_quant-aggressive_95" + ), +) +class YOLOPipeline(Pipeline): + """ + Image Segmentation YOLO pipeline for DeepSparse + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param class_names: Optional string identifier, dict, or json file of + class names to use for mapping class ids to class labels. Default is + `coco` + """ + + def __init__( + self, + *, + class_names: Optional[Union[str, Dict[str, str]]] = "coco", + model_config: Optional[str] = None, + **kwargs, + ): + super().__init__( + **kwargs, + ) + + if isinstance(class_names, str): + if class_names.endswith(".json"): + class_names = json.load(open(class_names)) + elif class_names == "coco": + class_names = COCO_CLASSES + else: + raise ValueError(f"Unknown class_names: {class_names}") + + if isinstance(class_names, dict): + self._class_names = class_names + elif isinstance(class_names, list): + self._class_names = { + str(index): class_name for index, class_name in enumerate(class_names) + } + else: + raise ValueError( + "class_names must be a str identifier, dict, json file, or " + f"list of class names got {type(class_names)}" + ) + + onnx_model = onnx.load(self.onnx_file_path) + self.has_postprocessing = self.model_has_postprocessing( + loaded_onnx_model=onnx_model, + ) + self.input_shape = self._infer_image_shape(onnx_model=onnx_model) + self.is_quantized = self.model_is_quantized(onnx_model=onnx_model) + self.postprocessor = ( + None + if self.has_postprocessing + else YoloPostprocessor( + image_size=self.input_shape, + cfg=model_config, + ) + ) + self._model_config = model_config + + @property + def model_config(self) -> str: + return self._model_config + + @property + def class_names(self) -> Optional[Dict[str, str]]: + return self._class_names + + @property + def input_schema(self) -> Type[YOLOInput]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return YOLOInput + + @property + def output_schema(self) -> Type[YOLOOutput]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return YOLOOutput + + def setup_onnx_file_path(self) -> str: + """ + Performs any setup to unwrap and process the given `model_path` and other + class properties into an inference ready onnx file to be compiled by the + engine of the pipeline + + :return: file path to the ONNX file for the engine to compile + """ + return model_to_path(self.model_path) + + def process_inputs(self, inputs: YOLOInput) -> List[numpy.ndarray]: + """ + :param inputs: inputs to the pipeline. Must be the type of the `input_schema` + of this pipeline + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine + """ + image_batch = [] + + if isinstance(inputs.images, str): + inputs.images = [inputs.images] + + for image in inputs.images: + if isinstance(image, str): + image = cv2.imread(image) + image = cv2.resize(image, dsize=self.input_shape) + image = image[:, :, ::-1].transpose(2, 0, 1) + + image_batch.append(image) + + image_batch = numpy.stack(image_batch, axis=0) + image_batch = numpy.ascontiguousarray( + image_batch, + dtype=numpy.int8 if self.is_quantized else numpy.float32, + ) + image_batch /= 255 + + return [image_batch] + + def process_engine_outputs( + self, + engine_outputs: List[numpy.ndarray], + ) -> YOLOOutput: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + + # post-processing + if self.postprocessor: + batch_output = self.postprocessor.pre_nms_postprocess(engine_outputs) + else: + batch_output = engine_outputs[ + 0 + ] # post-processed values stored in first output + + # NMS + batch_output = postprocess_nms(batch_output) + + batch_predictions, batch_boxes, batch_scores, batch_labels = [], [], [], [] + + for image_output in batch_output: + batch_predictions.append(image_output.tolist()) + batch_boxes.append(image_output[:, 0:4].tolist()) + batch_scores.append(image_output[:, 4].tolist()) + batch_labels.append( + [ + self.class_names[str(class_ids)] + for class_ids in image_output[:, 5].astype(int) + ] + ) + + return YOLOOutput( + predictions=batch_predictions, + boxes=batch_boxes, + scores=batch_scores, + labels=batch_labels, + ) + + def _infer_image_shape(self, onnx_model) -> Tuple[int, ...]: + """ + Infer and return the expected shape of the input tensor + + :return: The expected shape of the input tensor from onnx graph + """ + input_tensor = onnx_model.graph.input[0] + return ( + input_tensor.type.tensor_type.shape.dim[2].dim_value, + input_tensor.type.tensor_type.shape.dim[3].dim_value, + ) + + def model_has_postprocessing(self, loaded_onnx_model) -> bool: + """ + :return: True if loaded_onnx_model has postprocessing, False otherwise + """ + # get number of dimensions in each output + outputs_num_dims = [ + len(output.type.tensor_type.shape.dim) + for output in loaded_onnx_model.graph.output + ] + + # assume if only one output, then it is post-processed + if len(outputs_num_dims) == 1: + return True + + return all(num_dims > outputs_num_dims[0] for num_dims in outputs_num_dims[1:]) + + def model_is_quantized(self, onnx_model) -> bool: + """ + :return: True if loaded_onnx_model is quantized, False otherwise + """ + return ( + onnx_model.graph.input[0].type.tensor_type.elem_type + == onnx.TensorProto.UINT8 + ) diff --git a/src/deepsparse/yolo/schemas.py b/src/deepsparse/yolo/schemas.py new file mode 100644 index 0000000000..f60357dfb5 --- /dev/null +++ b/src/deepsparse/yolo/schemas.py @@ -0,0 +1,70 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Input/Output Schemas for Image Segmentation with YOLO +""" +from collections import namedtuple +from typing import List, Union + +import numpy +from pydantic import BaseModel + + +__all__ = [ + "YOLOOutput", + "YOLOInput", +] + +_YOLOImageOutput = namedtuple( + "_YOLOImageOutput", ["predictions", "boxes", "scores", "labels"] +) + + +class YOLOInput(BaseModel): + """ + Input model for image classification + """ + + images: Union[str, List[numpy.ndarray], List[str]] + + class Config: + arbitrary_types_allowed = True + + +class YOLOOutput(BaseModel): + """ + Output model for image classification + """ + + predictions: List[List[List[float]]] + boxes: List[List[List[float]]] + scores: List[List[float]] + labels: List[List[str]] + + def __getitem__(self, index): + if index >= len(self.predictions): + raise IndexError("Index out of range") + + return _YOLOImageOutput( + self.predictions[index], + self.boxes[index], + self.scores[index], + self.labels[index], + ) + + def __iter__(self): + for index in range(len(self.predictions)): + yield self[index] diff --git a/src/deepsparse/yolo/utils/__init__.py b/src/deepsparse/yolo/utils/__init__.py new file mode 100644 index 0000000000..5344738df6 --- /dev/null +++ b/src/deepsparse/yolo/utils/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .coco_classes import * +from .utils import * diff --git a/src/deepsparse/yolo/utils/cli_helpers.py b/src/deepsparse/yolo/utils/cli_helpers.py new file mode 100644 index 0000000000..ccd366236f --- /dev/null +++ b/src/deepsparse/yolo/utils/cli_helpers.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Optional, Union + + +def parse_device( + ctx, + params, + value: Optional[Union[str, int]], +) -> Optional[Union[str, int]]: + """ + :param ctx: The click context + :param params: The click params + :param value: The device value to parse + :return: The correct inferred device + """ + try: + return int(value) + except (ValueError, TypeError): + return value + + +def create_dir_callback(ctx, params, value: str): + """ + Create and return directory if it doesn't exist. + + :param ctx: The click context + :param params: The click params + :param value: The value to create the directory from + :returns: The directory path + """ + os.makedirs(value, exist_ok=True) + return value diff --git a/src/deepsparse/yolo/utils/coco_classes.py b/src/deepsparse/yolo/utils/coco_classes.py new file mode 100644 index 0000000000..5e67829d8f --- /dev/null +++ b/src/deepsparse/yolo/utils/coco_classes.py @@ -0,0 +1,96 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +COCO_CLASSES = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py new file mode 100644 index 0000000000..0e14aad9fe --- /dev/null +++ b/src/deepsparse/yolo/utils/utils.py @@ -0,0 +1,795 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Helpers and Utilities for YOLO +""" +import functools +import glob +import itertools +import logging +import os +import random +import shutil +import time +from pathlib import Path +from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union + +import numpy +import onnx +import yaml + +import torch +import torchvision +from sparsezoo.utils import create_dirs + + +try: + import cv2 + + cv2_error = None +except ModuleNotFoundError as cv2_import_error: + cv2 = None + cv2_error = cv2_import_error + +_YOLO_CLASS_COLORS = list(itertools.product([0, 255, 128, 64, 192], repeat=3)) +_YOLO_CLASS_COLORS.remove((255, 255, 255)) # remove white from possible colors +_LOGGER = logging.getLogger(__name__) + +# Default YOLO anchor grids +_YOLO_DEFAULT_ANCHORS = [ + torch.Tensor([[10, 13], [16, 30], [33, 23]]), + torch.Tensor([[30, 61], [62, 45], [59, 119]]), + torch.Tensor([[116, 90], [156, 198], [373, 326]]), +] +_YOLO_DEFAULT_ANCHOR_GRIDS = [ + t.clone().view(1, -1, 1, 1, 2) for t in _YOLO_DEFAULT_ANCHORS +] + + +@functools.lru_cache(maxsize=None) +def _get_color(label): + # cache color lookups + return random.choice(_YOLO_CLASS_COLORS) + + +class YoloPostprocessor: + """ + Class for performing post-processing of YOLO model predictions + + :param image_size: size of input image to model. used to calculate stride based on + output shapes + """ + + def __init__( + self, image_size: Tuple[int, int] = (640, 640), cfg: Optional[str] = None + ): + self._image_size = image_size + self._anchor_grids = ( + self._load_cfg_anchor_grid(cfg) if cfg else _YOLO_DEFAULT_ANCHOR_GRIDS + ) + self._grids = {} # Dict[Tuple[int], torch.Tensor] + + def pre_nms_postprocess(self, outputs: List[numpy.ndarray]) -> torch.Tensor: + """ + :param outputs: raw outputs of a YOLO model before anchor grid processing + :return: post-processed model outputs without NMS. + """ + # postprocess and transform raw outputs into single torch tensor + processed_outputs = [] + for idx, pred in enumerate(outputs): + pred = torch.from_numpy(pred) + pred = pred.sigmoid() + + # get grid and stride + grid_shape = pred.shape[2:4] + grid = self._get_grid(grid_shape) + stride = self._image_size[0] / grid_shape[0] + + # decode xywh box values + pred[..., 0:2] = (pred[..., 0:2] * 2.0 - 0.5 + grid) * stride + pred[..., 2:4] = (pred[..., 2:4] * 2) ** 2 * self._anchor_grids[idx] + # flatten anchor and grid dimensions -> + # (bs, num_predictions, num_classes + 5) + processed_outputs.append(pred.view(pred.size(0), -1, pred.size(-1))) + return torch.cat(processed_outputs, 1) + + def _get_grid(self, grid_shape: Tuple[int, int]) -> torch.Tensor: + if grid_shape not in self._grids: + # adapted from yolov5.yolo.Detect._make_grid + coords_y, coords_x = torch.meshgrid( + [torch.arange(grid_shape[0]), torch.arange(grid_shape[1])] + ) + grid = torch.stack((coords_x, coords_y), 2) + self._grids[grid_shape] = grid.view( + 1, 1, grid_shape[0], grid_shape[1], 2 + ).float() + return self._grids[grid_shape] + + @staticmethod + def _load_cfg_anchor_grid(cfg: str) -> List[torch.Tensor]: + with open(cfg) as f: + anchors = yaml.safe_load(f)["anchors"] + + def _split_to_coords(coords_list): + return [ + [coords_list[idx], coords_list[idx + 1]] + for idx in range(0, len(coords_list), 2) + ] + + anchors = [torch.Tensor(_split_to_coords(coords)) for coords in anchors] + return [t.clone().view(1, -1, 1, 1, 2) for t in anchors] + + +def postprocess_nms(outputs: Union[torch.Tensor, numpy.ndarray]) -> List[numpy.ndarray]: + """ + :param outputs: Tensor of post-processed model outputs + :return: List of numpy arrays of NMS predictions for each image in the batch + """ + # run nms in PyTorch, only post-process first output + if isinstance(outputs, numpy.ndarray): + outputs = torch.from_numpy(outputs) + nms_outputs = _non_max_suppression(outputs) + return [output.cpu().numpy() for output in nms_outputs] + + +def _non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), +): + # Ported from ultralytics/yolov5 + + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, ( + f"Invalid Confidence threshold {conf_thres}, " + "valid values are between 0.0 and 1.0" + ) + assert ( + 0 <= iou_thres <= 1 + ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + + # Settings + _, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_det = 300 # maximum number of detections per image + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + label_ = labels[xi] + v = torch.zeros((len(label_), nc + 5), device=x.device) + v[:, :4] = label_[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(label_)), label_[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = _xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = _box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( + 1, keepdim=True + ) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f"WARNING: NMS time limit {time_limit}s exceeded") + break # time limit exceeded + + return output + + +def _xywh2xyxy( + x: Union[torch.Tensor, numpy.ndarray] +) -> Union[torch.Tensor, numpy.ndarray]: + # ported from ultralytics/yolov5 + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] + # where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else numpy.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def _box_iou(box1: torch.Tensor, box2: torch.Tensor) -> torch.Tensor: + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = ( + ( + torch.min(box1[:, None, 2:], box2[:, 2:]) + - torch.max(box1[:, None, :2], box2[:, :2]) + ) + .clamp(0) + .prod(2) + ) + return inter / ( + area1[:, None] + area2 - inter + ) # iou = inter / (area1 + area2 - inter) + + +def yolo_onnx_has_postprocessing(model_path: str) -> bool: + """ + :param model_path: file path to YOLO ONNX model + :return: True if YOLO postprocessing (pre-nms) is included in the ONNX graph, + this is assumed to be when the first output of the model has fewer dimensions + than the other outputs as the grid dimensions have been flattened + """ + model = onnx.load(model_path) + + # get number of dimensions in each output + outputs_num_dims = [ + len(output.type.tensor_type.shape.dim) for output in model.graph.output + ] + + # assume if only one output, then it is post-processed + if len(outputs_num_dims) == 1: + return True + + return all(num_dims > outputs_num_dims[0] for num_dims in outputs_num_dims[1:]) + + +def annotate( + pipeline: "YOLOPipeline", # noqa: F821 + image_batch: Union[List[numpy.ndarray], List[str]], + target_fps: float = None, + calc_fps: bool = False, + original_images: Optional[Union[List[numpy.ndarray], numpy.ndarray]] = None, +) -> List[numpy.ndarray]: + """ + Annotated and return image_batch with bounding boxes and labels + + :param pipeline: A YOLOPipeline object + :param image_batch: A list of image files, or batch of numpy image_batch + :param target_fps: If not None, then the pipeline will be run at this target + :param calc_fps: If True, and target_fps is None then the pipeline will + calculate the FPS + :param original_images: images from input_batch before any processing + :return: A list of annotated images + + """ + + if not isinstance(image_batch, list): + image_batch = [image_batch] + + if not original_images: + original_images = image_batch + + batch_size = len(image_batch) + if image_batch and isinstance(image_batch[0], str): + original_images = [cv2.imread(image) for image in image_batch] + + if target_fps is None and calc_fps: + start = time.time() + + pipeline_outputs = pipeline(images=image_batch) + + if target_fps is None and calc_fps: + target_fps = float(batch_size) / (time.time() - start) + + annotated_images = [] + for index, image_output in enumerate(pipeline_outputs): + image = original_images[index] + result = _annotate_image( + img=image, + boxes=image_output.boxes, + labels=image_output.labels, + scores=image_output.scores, + model_input_size=pipeline.input_shape, + images_per_sec=target_fps, + ) + annotated_images.append(result) + + return annotated_images + + +def _annotate_image( + img: numpy.ndarray, + boxes: List[List[float]], + scores: List[float], + labels: List[str], + score_threshold: float = 0.35, + model_input_size: Tuple[int, int] = None, + images_per_sec: Optional[float] = None, +) -> numpy.ndarray: + """ + Draws bounding boxes on predictions of a detection model + + :param img: Original image to annotate (no pre-processing needed) + :param boxes: List of bounding boxes (x1, y1, x2, y2) + :param scores: List of scores for each bounding box + :param labels: List of labels for each bounding box + :param score_threshold: minimum score a detection should have to be annotated + on the image. Default is 0.35 + :param model_input_size: 2-tuple of expected input size for the given model to + be used for bounding box scaling with original image. Scaling will not + be applied if model_input_size is None. Default is None + :param images_per_sec: optional image_batch per second to annotate the left corner + of the image with + :return: the original image annotated with the given bounding boxes + """ + img_res = numpy.copy(img) + + scale_y = img.shape[0] / (1.0 * model_input_size[0]) if model_input_size else 1.0 + scale_x = img.shape[1] / (1.0 * model_input_size[1]) if model_input_size else 1.0 + + for idx in range(len(boxes)): + label = labels[idx] + if scores[idx] > score_threshold: + annotation_text = f"{label}: {scores[idx]:.0%}" + + # bounding box points + left = boxes[idx][0] * scale_x + top = boxes[idx][1] * scale_y + right = boxes[idx][2] * scale_x + bottom = boxes[idx][3] * scale_y + + # calculate text size + (text_width, text_height), text_baseline = cv2.getTextSize( + annotation_text, + cv2.FONT_HERSHEY_SIMPLEX, + 0.9, # font scale + 2, # thickness + ) + text_height += text_baseline + + # make solid background for annotation text + cv2.rectangle( + img_res, + (int(left), int(top) - 33), + (int(left) + text_width, int(top) - 28 + text_height), + _get_color(label), + thickness=-1, # filled solid + ) + + # add white annotation text + cv2.putText( + img_res, + annotation_text, + (int(left), int(top) - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.9, # font scale + (255, 255, 255), # white text + 2, # thickness + cv2.LINE_AA, + ) + + # draw bounding box + cv2.rectangle( + img_res, + (int(left), int(top)), + (int(right), int(bottom)), + _get_color(label), + thickness=2, + ) + + if images_per_sec is not None: + cv2.putText( + img_res, + f"images_per_sec: {int(images_per_sec)}", + (50, 50), + cv2.FONT_HERSHEY_SIMPLEX, + 2.0, # font scale + (245, 46, 6), # color + 2, # thickness + cv2.LINE_AA, + ) + return img_res + + +def get_yolo_loader_and_saver( + path: str, + save_dir: str, + image_shape: Tuple[int, int] = (640, 640), + target_fps: Optional[float] = None, + no_save: bool = False, +) -> Union[Iterable, Any, bool]: + """ + + :param path: file path to image or directory of .jpg files, a .mp4 video, + or an integer (i.e. 0) for web-cam + :param save_dir: path of directory to save to + :param image_shape: size of input image_batch to model + :param target_fps: fps to save potential video at + :param no_save: set true if not saving results of processing + :return: image loader iterable, result saver objects + image_batch, video, or web-cam based on path given, and a boolean value + that is True is the returned objects load videos + """ + # video + if path.endswith(".mp4"): + loader = YoloVideoLoader(path, image_shape) + saver = VideoSaver( + save_dir, + loader.original_fps, + loader.original_frame_size, + target_fps, + ) + return loader, saver, True + # webcam + if path.isnumeric(): + loader = YoloWebcamLoader(int(path), image_shape) + saver = ( + VideoSaver(save_dir, 30, loader.original_frame_size, None) + if not no_save + else None + ) + return loader, saver, True + # image file(s) + return YoloImageLoader(path, image_shape), ImagesSaver(save_dir), False + + +class YoloImageLoader: + """ + Class for pre-processing and iterating over image_batch to be used as input for YOLO + models + + :param path: Filepath to single image file or directory of image files to load, + glob paths also valid + :param image_size: size of input image_batch to model + """ + + def __init__(self, path: str, image_size: Tuple[int, int] = (640, 640)): + self._path = path + self._image_size = image_size + + if os.path.isdir(path): + self._image_file_paths = [ + os.path.join(path, file_name) for file_name in os.listdir(path) + ] + elif "*" in path: + self._image_file_paths = glob.glob(path) + elif os.path.isfile(path): + # single file + self._image_file_paths = [path] + else: + raise ValueError(f"{path} is not a file, glob, or directory") + + def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: + for image_path in self._image_file_paths: + yield load_image(image_path, image_size=self._image_size) + + +class YoloVideoLoader: + """ + Class for pre-processing and iterating over video frames to be used as input for + YOLO models + + :param path: Filepath to single video file + :param image_size: size of input image_batch to model + """ + + def __init__(self, path: str, image_size: Tuple[int, int] = (640, 640)): + self._path = path + self._image_size = image_size + self._vid = cv2.VideoCapture(self._path) + self._total_frames = int(self._vid.get(cv2.CAP_PROP_FRAME_COUNT)) + self._fps = self._vid.get(cv2.CAP_PROP_FPS) + + def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: + for _ in range(self._total_frames): + loaded, frame = self._vid.read() + if not loaded: + break + yield load_image(frame, image_size=self._image_size) + self._vid.release() + + @property + def original_fps(self) -> float: + """ + :return: the frames per second of the video this object reads + """ + return self._fps + + @property + def original_frame_size(self) -> Tuple[int, int]: + """ + :return: the original size of frames in the video this object reads + """ + return ( + int(self._vid.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(self._vid.get(cv2.CAP_PROP_FRAME_HEIGHT)), + ) + + @property + def total_frames(self) -> int: + """ + :return: the total number of frames this object may laod from the video + """ + return self._total_frames + + +class YoloWebcamLoader: + """ + Class for pre-processing and iterating over webcam frames to be used as input for + YOLO models. + + Adapted from: https://github.com/ultralytics/yolov5/blob/master/utils/datasets.py + + :param camera: Webcam index + :param image_size: size of input image_batch to model + """ + + def __init__(self, camera: int, image_size: Tuple[int, int] = (640, 640)): + + self._camera = camera + self._image_size = image_size + self._stream = cv2.VideoCapture(self._camera) + self._stream.set(cv2.CAP_PROP_BUFFERSIZE, 3) + + def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: + while True: + if cv2.waitKey(1) == ord("q"): # q to quit + self._stream.release() + cv2.destroyAllWindows() + break + loaded, frame = self._stream.read() + + assert loaded, f"Could not load image from webcam {self._camera}" + + frame = cv2.flip(frame, 1) # flip left-right + yield load_image(frame, image_size=self._image_size) + + @property + def original_frame_size(self) -> Tuple[int, int]: + """ + :return: the original size of frames in the stream this object reads + """ + return ( + int(self._stream.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(self._stream.get(cv2.CAP_PROP_FRAME_HEIGHT)), + ) + + +class ImagesSaver: + """ + Base class for saving YOLO model outputs. Saves each image as an individual file in + the given directory + + :param save_dir: path to directory to write to + """ + + def __init__(self, save_dir: str): + self._save_dir = save_dir + self._idx = 0 + + create_dirs(save_dir) + + def save_frame(self, image: numpy.ndarray): + """ + :param image: numpy array of image to save + """ + output_path = os.path.join(self._save_dir, f"result-{self._idx}.jpg") + cv2.imwrite(output_path, image) + self._idx += 1 + + def close(self): + """ + perform any clean-up tasks + """ + pass + + +class VideoSaver(ImagesSaver): + """ + Class for saving YOLO model outputs as a VideoFile + + :param save_dir: path to directory to write to + :param original_fps: frames per second to save video with + :param output_frame_size: size of frames to write + :param target_fps: fps target for output video. if present, video + will be written with a certain number of the original frames + evenly dropped to match the target FPS. + """ + + def __init__( + self, + save_dir: str, + original_fps: float, + output_frame_size: Tuple[int, int], + target_fps: Optional[float] = None, + ): + super().__init__(save_dir) + + self._output_frame_size = output_frame_size + self._original_fps = original_fps + + if target_fps is not None and target_fps >= original_fps: + print( + f"target_fps {target_fps} is greater than source_fps " + f"{original_fps}. target fps file will not be invoked" + ) + self._target_fps = target_fps + + self._file_path = os.path.join(self._save_dir, "results.mp4") + self._writer = cv2.VideoWriter( + self._file_path, + cv2.VideoWriter_fourcc(*"mp4v"), + original_fps, + self._output_frame_size, + ) + self._n_frames = 0 + + def save_frame(self, image: numpy.ndarray): + """ + :param image: numpy array of image to save + """ + self._writer.write(image) + self._n_frames += 1 + + def close(self): + """ + perform any clean-up tasks + """ + self._writer.release() + if self._target_fps is not None and self._target_fps < self._original_fps: + self._write_target_fps_video() + + def _write_target_fps_video(self): + assert self._target_fps is not None + num_frames_to_keep = int( + self._n_frames * (self._target_fps / self._original_fps) + ) + # adjust target fps so we can keep the same video duration + adjusted_target_fps = num_frames_to_keep * (self._original_fps / self._n_frames) + + # select num_frames_to_keep evenly spaced frame idxs + frame_idxs_to_keep = set( + numpy.round(numpy.linspace(0, self._n_frames, num_frames_to_keep)) + .astype(int) + .tolist() + ) + + # create new video writer for adjusted video + vid_path = os.path.join( + self._save_dir, f"_results-{adjusted_target_fps:.2f}fps.mp4" + ) + fps_writer = cv2.VideoWriter( + vid_path, + cv2.VideoWriter_fourcc(*"mp4v"), + adjusted_target_fps, + self._output_frame_size, + ) + + # read from original video and write to FPS adjusted video + saved_vid = cv2.VideoCapture(self._file_path) + for idx in range(self._n_frames): + _, frame = saved_vid.read() + if idx in frame_idxs_to_keep: + fps_writer.write(frame) + + saved_vid.release() + fps_writer.release() + shutil.move(vid_path, self._file_path) # overwrite original file + + +def load_image( + img: Union[str, numpy.ndarray], image_size: Tuple[int, int] = (640, 640) +) -> Tuple[List[numpy.ndarray], List[numpy.ndarray]]: + """ + :param img: file path to image or raw image array + :param image_size: target shape for image + :return: Image loaded into numpy and reshaped to the given shape and the original + image + """ + img = cv2.imread(img) if isinstance(img, str) else img + img_resized = cv2.resize(img, image_size) + img_transposed = img_resized[:, :, ::-1].transpose(2, 0, 1) + + return img_transposed, img + + +def get_annotations_save_dir( + initial_save_dir: str, + tag: Optional[str] = None, + engine: Optional[str] = None, +) -> str: + """ + Returns the directory to save annotations to. If directory exists and is + non-empty, a number is appended to the end of the directory name. + + :param initial_save_dir: Initial directory to save annotations to + :param tag: A tag under which to save the annotations inside `save_dir` + :param engine: Used to generate a unique tag if it is not provided. + :return: A new unique dir path to save annotations to + """ + name = tag or f"{engine}-annotations" + initial_save_dir = os.path.join(initial_save_dir, name) + counter = 0 + new_save_dir = initial_save_dir + while Path(new_save_dir).exists() and any(Path(new_save_dir).iterdir()): + counter += 1 + new_save_dir = os.path.join(initial_save_dir, f"{name}-{counter:03d}") + + _LOGGER.info(f"Results will be saved to {new_save_dir}") + Path(new_save_dir).mkdir(parents=True, exist_ok=True) + return new_save_dir