diff --git a/README.md b/README.md index e0ea89bcc3..91e4d21b22 100644 --- a/README.md +++ b/README.md @@ -139,12 +139,12 @@ deepsparse.benchmark [-h] [-b BATCH_SIZE] [-shapes INPUT_SHAPES] ## 👩‍💻 NLP Inference Example ```python -from deepsparse import Pipeline +from deepsparse.transformers import pipeline # SparseZoo model stub or path to ONNX file model_path = "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni" -qa_pipeline = Pipeline.create( +qa_pipeline = pipeline( task="question-answering", model_path=model_path, ) diff --git a/setup.py b/setup.py index c6299ca473..6cd3ec7861 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,6 @@ _deps = [ "numpy>=1.16.3", "onnx>=1.5.0,<=1.10.1", - "pydantic>=1.8.2", "requests>=2.0.0", "tqdm>=4.0.0", "protobuf>=3.12.2", @@ -81,16 +80,6 @@ "onnxruntime>=1.7.0", ] -_ic_integration_deps = [ - "click<8.1", - "opencv-python", -] - -_yolo_integration_deps = [ - "torchvision>=0.3.0,<=0.10.1", - "opencv-python", -] - class OverrideInstall(install): """ @@ -183,15 +172,12 @@ def _setup_extras() -> Dict: "dev": _dev_deps, "server": _server_deps, "onnxruntime": _onnxruntime_deps, - "image_classification": _ic_integration_deps, - "yolo": _yolo_integration_deps, } def _setup_entry_points() -> Dict: data_api_entrypoint = "deepsparse.transformers.pipelines_cli:cli" eval_downstream = "deepsparse.transformers.eval_downstream:main" - return { "console_scripts": [ f"deepsparse.transformers.run_inference={data_api_entrypoint}", @@ -200,7 +186,6 @@ def _setup_entry_points() -> Dict: "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability", "deepsparse.benchmark=deepsparse.benchmark.benchmark_model:main", "deepsparse.server=deepsparse.server.main:start_server", - "deepsparse.object_detection.annotate=deepsparse.yolo.annotate:main", ] } diff --git a/src/deepsparse/__init__.py b/src/deepsparse/__init__.py index d9c28dc591..3d3113b74b 100644 --- a/src/deepsparse/__init__.py +++ b/src/deepsparse/__init__.py @@ -31,7 +31,6 @@ cpu_vnni_compatible, ) from .engine import * -from .pipeline import * from .version import __version__, is_release diff --git a/src/deepsparse/image_classification/__init__.py b/src/deepsparse/image_classification/__init__.py deleted file mode 100644 index 0c44f887a4..0000000000 --- a/src/deepsparse/image_classification/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/src/deepsparse/image_classification/constants.py b/src/deepsparse/image_classification/constants.py deleted file mode 100644 index d035e44513..0000000000 --- a/src/deepsparse/image_classification/constants.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -IMAGENET_RGB_MEANS = [0.485, 0.456, 0.406] -IMAGENET_RGB_STDS = [0.229, 0.224, 0.225] diff --git a/src/deepsparse/image_classification/pipelines.py b/src/deepsparse/image_classification/pipelines.py deleted file mode 100644 index e085937728..0000000000 --- a/src/deepsparse/image_classification/pipelines.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Image classification pipeline -""" -import json -from typing import Dict, List, Optional, Tuple, Type, Union - -import numpy -import onnx - -from deepsparse.image_classification.constants import ( - IMAGENET_RGB_MEANS, - IMAGENET_RGB_STDS, -) -from deepsparse.image_classification.schemas import ( - ImageClassificationInput, - ImageClassificationOutput, -) -from deepsparse.pipeline import Pipeline -from deepsparse.utils import model_to_path - - -try: - import cv2 - - cv2_error = None -except ModuleNotFoundError as cv2_import_error: - cv2 = None - cv2_error = cv2_import_error - - -@Pipeline.register( - task="image_classification", - default_model_path=( - "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/" - "imagenet/pruned85_quant-none-vnni" - ), -) -class ImageClassificationPipeline(Pipeline): - """ - Image classification pipeline for DeepSparse - - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param class_names: Optional dict, or json file of class names to use for - mapping class ids to class labels. Default is None - """ - - def __init__( - self, - *, - class_names: Union[None, str, Dict[str, str]] = None, - **kwargs, - ): - super().__init__(**kwargs) - - if isinstance(class_names, str) and class_names.endswith(".json"): - self._class_names = json.load(open(class_names)) - elif isinstance(class_names, dict): - self._class_names = class_names - else: - self._class_names = None - - self._image_size = self._infer_image_size() - - @property - def class_names(self) -> Optional[Dict[str, str]]: - """ - :return: Optional dict, or json file of class names to use for - mapping class ids to class labels - """ - return self._class_names - - @property - def input_schema(self) -> Type[ImageClassificationInput]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return ImageClassificationInput - - @property - def output_schema(self) -> Type[ImageClassificationOutput]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return ImageClassificationOutput - - def setup_onnx_file_path(self) -> str: - """ - Performs any setup to unwrap and process the given `model_path` and other - class properties into an inference ready onnx file to be compiled by the - engine of the pipeline - - :return: file path to the ONNX file for the engine to compile - """ - - return model_to_path(self.model_path) - - def process_inputs(self, inputs: ImageClassificationInput) -> List[numpy.ndarray]: - """ - Pre-Process the Inputs for DeepSparse Engine - - :param inputs: input model - :return: list of preprocessed numpy arrays - """ - - if isinstance(inputs.images, numpy.ndarray): - image_batch = inputs.images - else: - - image_batch = [] - - if isinstance(inputs.images, str): - inputs.images = [inputs.images] - - for image in inputs.images: - if cv2 is None: - raise RuntimeError( - "cv2 is required to load image inputs from file " - f"Unable to import: {cv2_error}" - ) - img = cv2.imread(image) if isinstance(image, str) else image - - img = cv2.resize(img, dsize=self._image_size) - img = img[:, :, ::-1].transpose(2, 0, 1) - image_batch.append(img) - - image_batch = numpy.stack(image_batch, axis=0) - - original_dtype = image_batch.dtype - image_batch = numpy.ascontiguousarray(image_batch, dtype=numpy.float32) - - if original_dtype == numpy.uint8: - - image_batch /= 255 - - # normalize entire batch - image_batch -= numpy.asarray(IMAGENET_RGB_MEANS).reshape((-1, 3, 1, 1)) - image_batch /= numpy.asarray(IMAGENET_RGB_STDS).reshape((-1, 3, 1, 1)) - - return [image_batch] - - def process_engine_outputs( - self, - engine_outputs: List[numpy.ndarray], - ) -> ImageClassificationOutput: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - labels = numpy.argmax(engine_outputs[0], axis=1).tolist() - - if self.class_names is not None: - labels = [self.class_names[str(class_id)] for class_id in labels] - - return self.output_schema( - scores=numpy.max(engine_outputs[0], axis=1).tolist(), - labels=labels, - ) - - def _infer_image_size(self) -> Tuple[int, ...]: - """ - Infer and return the expected shape of the input tensor - - :return: The expected shape of the input tensor from onnx graph - """ - onnx_model = onnx.load(self.onnx_file_path) - input_tensor = onnx_model.graph.input[0] - return ( - input_tensor.type.tensor_type.shape.dim[2].dim_value, - input_tensor.type.tensor_type.shape.dim[3].dim_value, - ) diff --git a/src/deepsparse/image_classification/schemas.py b/src/deepsparse/image_classification/schemas.py deleted file mode 100644 index 5a92b90e3b..0000000000 --- a/src/deepsparse/image_classification/schemas.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Input/Output Schemas for Image Classification. -""" - -from typing import List, Union - -import numpy -from pydantic import BaseModel - - -class ImageClassificationInput(BaseModel): - """ - Input model for image classification - """ - - images: Union[str, numpy.ndarray, List[str]] - - class Config: - arbitrary_types_allowed = True - - -class ImageClassificationOutput(BaseModel): - """ - Output model for image classification - """ - - labels: List[Union[int, str]] - scores: List[float] diff --git a/src/deepsparse/image_classification/validation_script.py b/src/deepsparse/image_classification/validation_script.py deleted file mode 100644 index e176b4072c..0000000000 --- a/src/deepsparse/image_classification/validation_script.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Usage: validation_script.py [OPTIONS] - - Validation Script for Image Classification Models - -Options: - --dataset-path, --dataset_path DIRECTORY - Path to the validation dataset [required] - --model-path, --model_path TEXT - Path/SparseZoo stub for the Image - Classification model to be evaluated. - Defaults to resnet50 trained on - Imagenette [default: zoo:cv/classification/ - resnet_v1-50/pytorch/sparseml/imagenette/ - base-none] - --batch-size, --batch_size INTEGER - Test batch size, must divide the dataset - evenly, else the last batch will be dropped - [default: 1] - --help Show this message and exit. - -######### -EXAMPLES -######### - -########## -Example command for validating pruned resnet50 on imagenette dataset: -python validation_script.py \ - --dataset-path /path/to/imagenette/ - -""" -from tqdm import tqdm - -from deepsparse.pipeline import Pipeline -from torch.utils.data import DataLoader -from torchvision import transforms - - -try: - import torchvision - -except ModuleNotFoundError as torchvision_error: # noqa: F841 - print( - "Torchvision not installed. Please install it using the command:" - "pip install torchvision>=0.3.0,<=0.10.1" - ) - exit(1) - -import click - - -resnet50_imagenet_pruned = ( - "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" -) - - -@click.command() -@click.option( - "--dataset-path", - "--dataset_path", - required=True, - type=click.Path(dir_okay=True, file_okay=False), - help="Path to the validation dataset", -) -@click.option( - "--model-path", - "--model_path", - type=str, - default=resnet50_imagenet_pruned, - help="Path/SparseZoo stub for the Image Classification model to be " - "evaluated. Defaults to dense (vanilla) resnet50 trained on Imagenette", - show_default=True, -) -@click.option( - "--batch-size", - "--batch_size", - type=int, - default=1, - show_default=True, - help="Test batch size, must divide the dataset evenly, else last " - "batch will be dropped", -) -@click.option( - "--image-size", - "--image_size", - type=int, - default=224, - show_default=True, - help="Test batch size, must divide the dataset evenly, else last " - "batch will be dropped", -) -def main(dataset_path: str, model_path: str, batch_size: int, image_size: int): - """ - Validation Script for Image Classification Models - """ - - dataset = torchvision.datasets.ImageFolder( - root=dataset_path, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Resize(size=(image_size, image_size)), - ] - ), - ) - - data_loader = DataLoader( - dataset=dataset, - batch_size=batch_size, - drop_last=True, - ) - - pipeline = Pipeline.create( - task="image_classification", - model_path=model_path, - batch_size=batch_size, - ) - correct = total = 0 - progress_bar = tqdm(data_loader) - - for batch in progress_bar: - batch, actual_labels = batch - batch = batch.numpy() - outs = pipeline(images=batch) - predicted_labels = outs.labels - - for actual, predicted in zip(actual_labels, predicted_labels): - total += 1 - if isinstance(predicted, str): - predicted = int(predicted) - if actual.item() == predicted: - correct += 1 - - if total > 0: - progress_bar.set_postfix( - {"Running Accuracy": f"{correct * 100 / total:.2f}%"} - ) - - # prevent division by zero - if total == 0: - epsilon = 1e-5 - total += epsilon - - print(f"Accuracy: {correct * 100 / total:.2f} %") - - -if __name__ == "__main__": - main() diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py deleted file mode 100644 index 5ab6b9ec63..0000000000 --- a/src/deepsparse/pipeline.py +++ /dev/null @@ -1,546 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Classes and registry for end to end inference pipelines that wrap an underlying -inference engine and include pre/postprocessing -""" - - -import os -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Type, Union - -import numpy -from pydantic import BaseModel, Field - -from deepsparse import Engine, Scheduler -from deepsparse.benchmark import ORTEngine -from deepsparse.tasks import SupportedTasks - - -__all__ = [ - "DEEPSPARSE_ENGINE", - "ORT_ENGINE", - "SUPPORTED_PIPELINE_ENGINES", - "Pipeline", - "PipelineConfig", -] - - -DEEPSPARSE_ENGINE = "deepsparse" -ORT_ENGINE = "onnxruntime" - -SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] - - -_REGISTERED_PIPELINES = {} - - -class Pipeline(ABC): - """ - Generic Pipeline abstract class meant to wrap inference engine objects to include - data pre/post-processing. Inputs and outputs of pipelines should be serialized - as pydantic Models. - - Pipelines should not be instantiated by their constructors, but rather the - `Pipeline.create()` method. The task name given to `create` will be used to - load the appropriate pipeline. When creating a Pipeline, the pipeline should - inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`, - `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods. - - Finally, the class definition should be decorated by the `Pipeline.register` - function. This defines the task name and task aliases for the pipeline and - ensures that it will be accessible by `Pipeline.create`. The implemented - `Pipeline` subclass must be imported at runtime to be accessible. - - Pipeline lifecycle: - - On instantiation - * `onnx_file_path` <- `setup_onnx_file_path` - * `engine` <- `_initialize_engine` - - - on __call__: - * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)` - * `pre_processed_inputs` <- `process_inputs(parsed_inputs)` - * `engine_outputs` <- `engine(pre_processed_inputs)` - * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)` - - Example use of register: - ```python - @Pipeline.register( - task="example_task", - task_aliases=["example_alias_1", "example_alias_2"], - ) - class PipelineImplementation(Pipeline): - # implementation of Pipeline abstract methods here - ``` - - Example use of pipeline: - ```python - example_pipeline = Pipeline.create( - task="example_task", - model_path="model.onnx", - ) - pipeline_outputs = example_pipeline(pipeline_inputs) - ``` - - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - """ - - def __init__( - self, - model_path: str, - engine_type: str = DEEPSPARSE_ENGINE, - batch_size: int = 1, - num_cores: int = None, - scheduler: Scheduler = None, - input_shapes: List[List[int]] = None, - alias: Optional[str] = None, - ): - self._model_path_orig = model_path - self._model_path = model_path - self._engine_type = engine_type - self._alias = alias - - self._engine_args = dict( - batch_size=batch_size, - num_cores=num_cores, - input_shapes=input_shapes, - ) - if engine_type.lower() == DEEPSPARSE_ENGINE: - self._engine_args["scheduler"] = scheduler - - self.onnx_file_path = self.setup_onnx_file_path() - self.engine = self._initialize_engine() - - def __call__(self, *args, **kwargs) -> BaseModel: - # parse inputs into input_schema schema if necessary - pipeline_inputs = self.parse_inputs(*args, **kwargs) - if not isinstance(pipeline_inputs, self.input_schema): - raise RuntimeError( - f"Unable to parse {self.__class__} inputs into a " - f"{self.input_schema} object. Inputs parsed to {type(pipeline_inputs)}" - ) - - # run pipeline - engine_inputs: List[numpy.ndarray] = self.process_inputs(pipeline_inputs) - - if isinstance(engine_inputs, tuple): - engine_inputs, postprocess_kwargs = engine_inputs - else: - postprocess_kwargs = {} - - engine_outputs: List[numpy.ndarray] = self.engine(engine_inputs) - pipeline_outputs = self.process_engine_outputs( - engine_outputs, **postprocess_kwargs - ) - - # validate outputs format - if not isinstance(pipeline_outputs, self.output_schema): - raise ValueError( - f"Outputs of {self.__class__} must be instances of " - f"{self.output_schema} found output of type {type(pipeline_outputs)}" - ) - - return pipeline_outputs - - @staticmethod - def create( - task: str, - model_path: str = None, - engine_type: str = DEEPSPARSE_ENGINE, - batch_size: int = 1, - num_cores: int = None, - scheduler: Scheduler = None, - input_shapes: List[List[int]] = None, - alias: Optional[str] = None, - **kwargs, - ) -> "Pipeline": - """ - :param task: name of task to create a pipeline for - :param model_path: path on local system or SparseZoo stub to load the model - from. Some tasks may have a default model path - :param engine_type: inference engine to use. Currently supported values - include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param kwargs: extra task specific kwargs to be passed to task Pipeline - implementation - :return: pipeline object initialized for the given task - """ - task = task.lower().replace("-", "_") - - # extra step to register pipelines for a given task domain - # for cases where imports should only happen once a user specifies - # that domain is to be used. (ie deepsparse.transformers will auto - # install extra packages so should only import and register once a - # transformers task is specified) - SupportedTasks.check_register_task(task) - - if task not in _REGISTERED_PIPELINES: - raise ValueError( - f"Unknown Pipeline task {task}. Pipeline tasks should be " - "must be declared with the Pipeline.register decorator. Currently " - f"registered pipelines: {list(_REGISTERED_PIPELINES.keys())}" - ) - - pipeline_constructor = _REGISTERED_PIPELINES[task] - - if ( - model_path is None - and hasattr(pipeline_constructor, "default_model_path") - and pipeline_constructor.default_model_path - ): - model_path = pipeline_constructor.default_model_path - - if model_path is None: - raise ValueError( - f"No model_path provided for pipeline {pipeline_constructor}. Must " - "provide a model path for pipelines that do not have a default defined" - ) - - return pipeline_constructor( - model_path=model_path, - engine_type=engine_type, - batch_size=batch_size, - num_cores=num_cores, - scheduler=scheduler, - input_shapes=input_shapes, - alias=alias, - **kwargs, - ) - - @classmethod - def register( - cls, - task: str, - task_aliases: Optional[List[str]] = None, - default_model_path: Optional[str] = None, - ): - """ - Pipeline implementer class decorator that registers the pipeline - task name and its aliases as valid tasks that can be used to load - the pipeline through `Pipeline.create()`. - - Multiple pipelines may not have the same task name. An error will - be raised if two different pipelines attempt to register the same task name - - :param task: main task name of this pipeline - :param task_aliases: list of extra task names that may be used to reference - this pipeline. Default is None - :param default_model_path: path (ie zoo stub) to use as default for this - task if None is provided - """ - task_names = [task] - if task_aliases: - task_names.extend(task_aliases) - - def _register_task(task_name, pipeline_class): - if task_name in _REGISTERED_PIPELINES and ( - pipeline_class is not _REGISTERED_PIPELINES[task_name] - ): - raise RuntimeError( - f"task {task_name} already registered by Pipeline.register. " - f"attempting to register pipeline: {pipeline_class}, but" - f"pipeline: {_REGISTERED_PIPELINES[task_name]}, already registered" - ) - _REGISTERED_PIPELINES[task_name] = pipeline_class - - def _register_pipeline_tasks_decorator(pipeline_class: Pipeline): - if not issubclass(pipeline_class, cls): - raise RuntimeError( - f"Attempting to register pipeline pipeline_class. " - f"Registered pipelines must inherit from {cls}" - ) - for task_name in task_names: - _register_task(task_name, pipeline_class) - - # set task and task_aliases as class level property - pipeline_class.task = task - pipeline_class.task_aliases = task_aliases - pipeline_class.default_model_path = default_model_path - - return pipeline_class - - return _register_pipeline_tasks_decorator - - @classmethod - def from_config(cls, config: Union["PipelineConfig", str, Path]) -> "Pipeline": - """ - :param config: PipelineConfig object, filepath to a json serialized - PipelineConfig, or raw string of a json serialized PipelineConfig - :return: loaded Pipeline object from the config - """ - if isinstance(config, Path) or ( - isinstance(config, str) and os.path.exists(config) - ): - if isinstance(config, str): - config = Path(config) - config = PipelineConfig.parse_file(config) - if isinstance(config, str): - config = PipelineConfig.parse_raw(config) - - return cls.create( - task=config.task, - model_path=config.model_path, - engine_type=config.engine_type, - batch_size=config.batch_size, - num_cores=config.num_cores, - scheduler=config.scheduler, - input_shapes=config.input_shapes, - alias=config.alias, - **config.kwargs, - ) - - @abstractmethod - def setup_onnx_file_path(self) -> str: - """ - Performs any setup to unwrap and process the given `model_path` and other - class properties into an inference ready onnx file to be compiled by the - engine of the pipeline - - :return: file path to the ONNX file for the engine to compile - """ - raise NotImplementedError() - - @abstractmethod - def process_inputs( - self, - inputs: BaseModel, - ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]: - """ - :param inputs: inputs to the pipeline. Must be the type of the `input_schema` - of this pipeline - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine. Can - also include a tuple with engine inputs and special key word arguments - to pass to process_engine_outputs to facilitate information from the raw - inputs to postprocessing that may not be included in the engine inputs - """ - raise NotImplementedError() - - @abstractmethod - def process_engine_outputs( - self, - engine_outputs: List[numpy.ndarray], - **kwargs, - ) -> BaseModel: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - raise NotImplementedError() - - @property - @abstractmethod - def input_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - raise NotImplementedError() - - @property - @abstractmethod - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - raise NotImplementedError() - - @property - def alias(self) -> str: - """ - :return: optional name to give this pipeline instance, useful when - inferencing with multiple models - """ - return self._alias - - @property - def model_path_orig(self) -> str: - """ - :return: value originally passed to the `model_path` argument to initialize - this Pipeline - """ - return self._model_path_orig - - @property - def model_path(self) -> str: - """ - :return: path on local system to the onnx file of this model or directory - containing a model.onnx file along with supporting files - """ - return self._model_path - - @property - def engine_args(self) -> Dict[str, Any]: - """ - :return: arguments besides onnx filepath used to instantiate engine - """ - return self._engine_args - - @property - def engine_type(self) -> str: - """ - :return: type of inference engine used for model forward pass - """ - return self._engine_type - - def to_config(self) -> "PipelineConfig": - """ - :return: PipelineConfig that can be used to reload this object - """ - - if not hasattr(self, "task"): - raise RuntimeError( - f"{self.__class__} instance has no attribute task. Pipeline objects " - "must have a task to be serialized to a config. Pipeline objects " - "must be declared with the Pipeline.register object to be assigned a " - "task" - ) - - # parse any additional properties as kwargs - kwargs = {} - for attr_name, attr in self.__class__.__dict__.items(): - if isinstance(attr, property) and attr_name not in dir(PipelineConfig): - kwargs[attr_name] = getattr(self, attr_name) - - return PipelineConfig( - task=self.task, - model_path=self.model_path_orig, - engine_type=self.engine_type, - batch_size=self.batch_size, - num_cores=self.num_cores, - scheduler=self.scheduler, - input_shapes=self.input_shapes, - alias=self.alias, - kwargs=kwargs, - ) - - def parse_inputs(self, *args, **kwargs) -> BaseModel: - """ - :param args: ordered arguments to pipeline, only an input_schema object - is supported as an arg for this function - :param kwargs: keyword arguments to pipeline - :return: pipeline arguments parsed into the given `input_schema` - schema if necessary. If an instance of the `input_schema` is provided - it will be returned - """ - # passed input_schema schema directly - if len(args) == 1 and isinstance(args[0], self.input_schema) and not kwargs: - return args[0] - - if args: - raise ValueError( - f"pipeline {self.__class__} only supports either only a " - f"{self.input_schema} object. or keyword arguments to be construct " - f"one. Found {len(args)} args and {len(kwargs)} kwargs" - ) - - return self.input_schema(**kwargs) - - def _initialize_engine(self) -> Union[Engine, ORTEngine]: - engine_type = self.engine_type.lower() - - if engine_type == DEEPSPARSE_ENGINE: - return Engine(self.onnx_file_path, **self._engine_args) - elif engine_type == ORT_ENGINE: - return ORTEngine(self.onnx_file_path, **self._engine_args) - else: - raise ValueError( - f"Unknown engine_type {self.engine_type}. Supported values include: " - f"{SUPPORTED_PIPELINE_ENGINES}" - ) - - -class PipelineConfig(BaseModel): - """ - Configuration for creating a Pipeline object - - Can be used to create a Pipeline from a config object or file with - Pipeline.from_config(), or used as a building block for other configs - such as for deepsparse.server - """ - - task: str = Field( - description="name of task to create a pipeline for", - ) - model_path: str = Field( - description="path on local system or SparseZoo stub to load the model from", - ) - engine_type: str = Field( - default=DEEPSPARSE_ENGINE, - description=( - "inference engine to use. Currently supported values include " - "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'" - ), - ) - batch_size: int = Field( - default=1, - description=("static batch size to use for inference. Default is 1"), - ) - num_cores: int = Field( - default=None, - description=( - "number of CPU cores to allocate for inference engine. None" - "specifies all available cores. Default is None" - ), - ) - scheduler: str = Field( - default="async", - description=( - "(deepsparse only) kind of scheduler to execute with. Defaults to async" - ), - ) - input_shapes: List[List[int]] = Field( - default=None, - description=( - "list of shapes to set ONNX the inputs to. Pass None to use model as-is. " - "Default is None" - ), - ) - alias: str = Field( - default=None, - description=( - "optional name to give this pipeline instance, useful when inferencing " - "with multiple models. Default is None" - ), - ) - kwargs: Dict[str, Any] = Field( - default={}, - description=( - "Additional arguments for inference with the model that will be passed " - "into the pipeline as kwargs" - ), - ) diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py index 0d0be42ec0..7f9ac9bd59 100644 --- a/src/deepsparse/server/config.py +++ b/src/deepsparse/server/config.py @@ -19,18 +19,18 @@ import json import os from functools import lru_cache -from typing import List +from typing import Any, Dict, List import yaml from pydantic import BaseModel, Field -from deepsparse import PipelineConfig from deepsparse.cpu import cpu_architecture __all__ = [ "ENV_DEEPSPARSE_SERVER_CONFIG", "ENV_SINGLE_PREFIX", + "ServeModelConfig", "ServerConfig", ] @@ -39,15 +39,75 @@ ENV_SINGLE_PREFIX = "DEEPSPARSE_SINGLE_MODEL:" +class ServeModelConfig(BaseModel): + """ + Configuration for serving a model for a given task in the DeepSparse server + """ + + task: str = Field( + description=( + "The task the model_path is serving. For example, one of: " + "question_answering, text_classification, token_classification." + ), + ) + model_path: str = Field( + description=( + "The path to a model.onnx file, " + "a model folder containing the model.onnx and supporting files, " + "or a SparseZoo model stub." + ), + ) + batch_size: int = Field( + default=1, + description=( + "The batch size to instantiate the model with and use for serving" + ), + ) + alias: str = Field( + default=None, + description=( + "Alias name for model pipeline to be served. A convenience route of " + "/predict/alias will be added to the server if present. " + ), + ) + kwargs: Dict[str, Any] = Field( + default={}, + description=( + "Additional arguments for inference with the model that will be passed " + "into the pipeline as kwargs" + ), + ) + engine: str = Field( + default="deepsparse", + description=( + "The engine to use for serving the models such as deepsparse or onnxruntime" + ), + ) + num_cores: int = Field( + default=None, + description=( + "The number of physical cores to restrict the DeepSparse Engine to. " + "Defaults to all cores." + ), + ) + scheduler: str = Field( + default="async", + description=( + "The scheduler to use with the DeepSparse Engine such as sync or async. " + "Defaults to async" + ), + ) + + class ServerConfig(BaseModel): """ A configuration for serving models in the DeepSparse inference server """ - models: List[PipelineConfig] = Field( + models: List[ServeModelConfig] = Field( default=[], description=( - "The models to serve in the server defined by PipelineConfig objects" + "The models to serve in the server defined by the additional arguments" ), ) workers: str = Field( @@ -88,7 +148,7 @@ def server_config_from_env(env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG): config_dict = json.loads(config_file.replace(ENV_SINGLE_PREFIX, "")) config = ServerConfig() config.models.append( - PipelineConfig( + ServeModelConfig( task=config_dict["task"], model_path=config_dict["model_path"], batch_size=config_dict["batch_size"], @@ -98,7 +158,7 @@ def server_config_from_env(env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG): with open(config_file) as file: config_dict = yaml.safe_load(file.read()) config_dict["models"] = ( - [PipelineConfig(**model) for model in config_dict["models"]] + [ServeModelConfig(**model) for model in config_dict["models"]] if "models" in config_dict else [] ) diff --git a/src/deepsparse/server/main.py b/src/deepsparse/server/main.py index dc31f6427f..e8efead286 100644 --- a/src/deepsparse/server/main.py +++ b/src/deepsparse/server/main.py @@ -84,7 +84,6 @@ import click -from deepsparse import Pipeline from deepsparse.log import set_logging_level from deepsparse.server.asynchronous import execute_async, initialize_aysnc from deepsparse.server.config import ( @@ -92,6 +91,7 @@ server_config_from_env, server_config_to_env, ) +from deepsparse.server.pipelines import load_pipelines_definitions from deepsparse.server.utils import serializable_response from deepsparse.version import version @@ -130,11 +130,7 @@ def _home(): def _add_pipeline_route( - app, - pipeline: Pipeline, - num_models: int, - defined_tasks: set, - integration: str, + app, pipeline_def, num_models: int, defined_tasks: set, integration: str ): path = "/predict" @@ -146,27 +142,26 @@ def _add_pipeline_route( ) # required path name for Sagemaker path = "/invocations" - elif pipeline.alias: - path = f"/predict/{pipeline.alias}" + elif pipeline_def.config.alias: + path = f"/predict/{pipeline_def.config.alias}" elif num_models > 1: - if pipeline.task in defined_tasks: + if pipeline_def.config.task in defined_tasks: raise ValueError( - f"Multiple tasks defined for {pipeline.task} and no alias " - f"given for pipeline with model {pipeline.model_path_orig}. " + f"Multiple tasks defined for {pipeline_def.config.task} and no alias " + f"given for {pipeline_def.config}. " "Either define an alias or supply a single model for the task" ) - path = f"/predict/{pipeline.task}" - defined_tasks.add(pipeline.task) + path = f"/predict/{pipeline_def.config.task}" + defined_tasks.add(pipeline_def.config.task) @app.post( path, - response_model=pipeline.output_schema, + response_model=pipeline_def.response_model, tags=["prediction"], ) - async def _predict_func(request: pipeline.input_schema): + async def _predict_func(request: pipeline_def.request_model): results = await execute_async( - pipeline, - request, + pipeline_def.pipeline, **vars(request), **pipeline_def.kwargs ) return serializable_response(results) @@ -188,12 +183,15 @@ def server_app_factory(): _LOGGER.debug("loaded server config %s", config) _add_general_routes(app, config) - pipelines = [Pipeline.from_config(model_config) for model_config in config.models] - _LOGGER.debug("loaded pipeline definitions from config %s", pipelines) + pipeline_defs = load_pipelines_definitions(config) + _LOGGER.debug("loaded pipeline definitions from config %s", pipeline_defs) num_tasks = len(config.models) defined_tasks = set() - for pipeline in pipelines: - _add_pipeline_route(app, pipeline, num_tasks, defined_tasks, config.integration) + + for pipeline_def in pipeline_defs: + _add_pipeline_route( + app, pipeline_def, num_tasks, defined_tasks, config.integration + ) return app diff --git a/src/deepsparse/server/pipelines.py b/src/deepsparse/server/pipelines.py new file mode 100644 index 0000000000..ef07c68ca2 --- /dev/null +++ b/src/deepsparse/server/pipelines.py @@ -0,0 +1,89 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Pipelines that run preprocessing, postprocessing, and model inference +within the DeepSparse model server. +""" + +from typing import Any, Dict, List + +from pydantic import BaseModel, Field + +from deepsparse.server.config import ServeModelConfig, ServerConfig +from deepsparse.tasks import SupportedTasks + + +__all__ = ["PipelineDefinition", "load_pipelines_definitions"] + + +class PipelineDefinition(BaseModel): + """ + A definition of a pipeline to be served by the model server. + Used to create a prediction route on construction of the server app. + """ + + pipeline: Any = Field(description="the callable pipeline to invoke on each request") + request_model: Any = Field( + description="the pydantic model to validate the request body with" + ) + response_model: Any = Field( + description="the pydantic model to validate the response payload with" + ) + kwargs: Dict[str, Any] = Field( + description="any additional kwargs that should be passed into the pipeline" + ) + config: ServeModelConfig = Field( + description="the config for the model the pipeline is serving" + ) + + +def load_pipelines_definitions(config: ServerConfig) -> List[PipelineDefinition]: + """ + Load the pipeline definitions to use for creating prediction routes from + the given server configuration. + + :param config: the configuration to load pipeline definitions for + :return: the loaded pipeline definitions to use for serving inference requests + """ + defs = [] + + for model_config in config.models: + if SupportedTasks.is_nlp(model_config.task): + # dynamically import so we don't install dependencies when unneeded + from deepsparse.transformers.server import create_pipeline_definitions + + ( + pipeline, + request_model, + response_model, + kwargs, + ) = create_pipeline_definitions(model_config) + else: + raise ValueError( + f"unsupported task given of {model_config.task} " + f"for serve model config {model_config}" + ) + + defs.append( + PipelineDefinition( + pipeline=pipeline, + request_model=request_model, + response_model=response_model, + kwargs=kwargs, + config=model_config, + ) + ) + + return defs diff --git a/src/deepsparse/tasks.py b/src/deepsparse/tasks.py index 690de5276e..6ffaad7ec3 100644 --- a/src/deepsparse/tasks.py +++ b/src/deepsparse/tasks.py @@ -78,32 +78,6 @@ class SupportedTasks: token_classification=AliasedTask("token_classification", ["ner"]), ) - image_classification = namedtuple("image_classification", ["image_classification"])( - image_classification=AliasedTask( - "image_classification", - ["image_classification"], - ), - ) - - yolo = namedtuple("yolo", ["yolo"])( - yolo=AliasedTask("yolo", ["yolo"]), - ) - - @classmethod - def check_register_task(cls, task: str): - if cls.is_nlp(task): - # trigger transformers pipelines to register with Pipeline.register - import deepsparse.transformers.pipelines # noqa: F401 - - elif cls.is_image_classification(task): - # trigger image classification pipelines to - # register with Pipeline.register - import deepsparse.image_classification.pipelines # noqa: F401 - - elif cls.is_yolo(task): - # trigger yolo pipelines to register with Pipeline.register - import deepsparse.yolo.pipelines # noqa: F401 - @classmethod def is_nlp(cls, task: str) -> bool: """ @@ -116,21 +90,3 @@ def is_nlp(cls, task: str) -> bool: or cls.nlp.text_classification.matches(task) or cls.nlp.token_classification.matches(task) ) - - @classmethod - def is_image_classification(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an image - classification task - :return: True if it is an image classification task, False otherwise - """ - return cls.image_classification.image_classification.matches(task) - - @classmethod - def is_yolo(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an image - segmentation task using YOLO - :return: True if it is an segmentation task using YOLO, False otherwise - """ - return cls.yolo.yolo.matches(task) diff --git a/src/deepsparse/transformers/__init__.py b/src/deepsparse/transformers/__init__.py index 1264aa316d..89c7eb68ef 100644 --- a/src/deepsparse/transformers/__init__.py +++ b/src/deepsparse/transformers/__init__.py @@ -120,3 +120,4 @@ def _check_transformers_install(): from .helpers import * from .loaders import * from .pipelines import * +from .server import * diff --git a/src/deepsparse/transformers/eval_downstream.py b/src/deepsparse/transformers/eval_downstream.py index c01a649fbf..b434dec625 100644 --- a/src/deepsparse/transformers/eval_downstream.py +++ b/src/deepsparse/transformers/eval_downstream.py @@ -58,7 +58,7 @@ from tqdm.auto import tqdm -from deepsparse import Pipeline +from deepsparse.transformers import pipeline from datasets import load_dataset, load_metric # isort: skip @@ -79,14 +79,14 @@ def squad_eval(args): squad_metrics = load_metric("squad") # load QA pipeline - question_answer = Pipeline.create( + question_answer = pipeline( task="question-answering", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - sequence_length=args.max_sequence_length, + max_length=args.max_sequence_length, ) - print(f"Engine info: {question_answer.engine}") + print(f"Engine info: {question_answer.model}") for idx, sample in enumerate(tqdm(squad)): pred = question_answer( @@ -96,7 +96,7 @@ def squad_eval(args): ) squad_metrics.add_batch( - predictions=[{"prediction_text": pred.answer, "id": sample["id"]}], + predictions=[{"prediction_text": pred["answer"], "id": sample["id"]}], references=[{"answers": sample["answers"], "id": sample["id"]}], ) @@ -114,23 +114,21 @@ def mnli_eval(args): mnli_metrics = load_metric("glue", "mnli") # load pipeline - text_classify = Pipeline.create( + text_classify = pipeline( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - sequence_length=args.max_sequence_length, + max_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.engine}") - - label_map = {"entailment": 0, "neutral": 1, "contradiction": 2} + print(f"Engine info: {text_classify.model}") label_map = {"entailment": 0, "neutral": 1, "contradiction": 2} for idx, sample in enumerate(tqdm(mnli_matched)): pred = text_classify([[sample["premise"], sample["hypothesis"]]]) mnli_metrics.add_batch( - predictions=[label_map.get(pred.labels[0])], + predictions=[label_map.get(pred[0]["label"])], references=[sample["label"]], ) @@ -156,16 +154,14 @@ def qqp_eval(args): qqp_metrics = load_metric("glue", "qqp") # load pipeline - text_classify = Pipeline.create( + text_classify = pipeline( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - sequence_length=args.max_sequence_length, + max_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.engine}") - - label_map = {"not_duplicate": 0, "duplicate": 1} + print(f"Engine info: {text_classify.model}") label_map = {"not_duplicate": 0, "duplicate": 1} @@ -173,7 +169,7 @@ def qqp_eval(args): pred = text_classify([[sample["question1"], sample["question2"]]]) qqp_metrics.add_batch( - predictions=[label_map.get(pred.labels[0])], + predictions=[label_map.get(pred[0]["label"])], references=[sample["label"]], ) @@ -189,16 +185,14 @@ def sst2_eval(args): sst2_metrics = load_metric("glue", "sst2") # load pipeline - text_classify = Pipeline.create( + text_classify = pipeline( task="text-classification", model_path=args.onnx_filepath, engine_type=args.engine, num_cores=args.num_cores, - sequence_length=args.max_sequence_length, + max_length=args.max_sequence_length, ) - print(f"Engine info: {text_classify.engine}") - - label_map = {"negative": 0, "positive": 1} + print(f"Engine info: {text_classify.model}") label_map = {"negative": 0, "positive": 1} @@ -208,7 +202,7 @@ def sst2_eval(args): ) sst2_metrics.add_batch( - predictions=[label_map.get(pred.labels[0])], + predictions=[label_map.get(pred[0]["label"])], references=[sample["label"]], ) diff --git a/src/deepsparse/transformers/pipelines.py b/src/deepsparse/transformers/pipelines.py new file mode 100644 index 0000000000..7725a0e2c2 --- /dev/null +++ b/src/deepsparse/transformers/pipelines.py @@ -0,0 +1,1414 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Adaptation of transformers.pipelines and onnx_transformers.pipelines + +adapted from: +https://github.com/huggingface/transformers/blob/master/src/transformers/pipelines/base.py +https://github.com/patil-suraj/onnx_transformers/blob/master/onnx_transformers/pipelines.py + +""" + +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from itertools import chain +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union + +import numpy as np +from transformers.configuration_utils import PretrainedConfig +from transformers.data import ( + SquadExample, + SquadFeatures, + squad_convert_examples_to_features, +) +from transformers.file_utils import ExplicitEnum +from transformers.models.auto import AutoConfig, AutoTokenizer +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy +from transformers.utils import logging + +from deepsparse import Engine, compile_model, cpu +from deepsparse.transformers.helpers import ( + fix_numpy_types, + get_onnx_path_and_configs, + overwrite_transformer_onnx_model_inputs, +) +from deepsparse.transformers.loaders import get_batch_loader + + +try: + import onnxruntime + + ort_import_error = None +except Exception as ort_import_err: + onnxruntime = None + ort_import_error = ort_import_err + +__all__ = [ + "ArgumentHandler", + "Pipeline", + "TextClassificationPipeline", + "TokenClassificationPipeline", + "QuestionAnsweringPipeline", + "pipeline", + "overwrite_transformer_onnx_model_inputs", + "SUPPORTED_ENGINES", + "SUPPORTED_TASKS", +] + +logger = logging.get_logger(__name__) if logging else None + + +class ArgumentHandler(ABC): + """ + Base interface for handling arguments for each Pipeline. + """ + + @abstractmethod + def __call__(self, *args, **kwargs): + raise NotImplementedError() + + +class DefaultArgumentHandler(ArgumentHandler): + """ + Default argument parser handling parameters for each Pipeline`. + """ + + @staticmethod + def handle_kwargs(kwargs: Dict) -> List: + """ + :param kwargs: key word arguments for a pipeline + :return: list of the processed key word arguments + """ + if len(kwargs) == 1: + output = list(kwargs.values()) + else: + output = list(chain(kwargs.values())) + + return DefaultArgumentHandler.handle_args(output) + + @staticmethod + def handle_args(args: Sequence[Any]) -> List[str]: + """ + :param args: sequence of arguments to a pipeline + :return: list of formatted, processed arguments + """ + + # Only one argument, let's do case by case + if len(args) == 1: + if isinstance(args[0], str): + return [args[0]] + elif not isinstance(args[0], list): + return list(args) + else: + return args[0] + + # Multiple arguments (x1, x2, ...) + elif len(args) > 1: + if all([isinstance(arg, str) for arg in args]): + return list(args) + + # If not instance of list, then it should be an instance of iterable + elif isinstance(args, Iterable): + return list(chain.from_iterable(chain(args))) + else: + raise ValueError( + f"Invalid input type {type(args)}. Pipeline supports " + "Union[str, Iterable[str]]" + ) + else: + return [] + + def __call__(self, *args, **kwargs): + if len(kwargs) > 0 and len(args) > 0: + raise ValueError("Pipeline cannot handle mixed args and kwargs") + + if len(kwargs) > 0: + return DefaultArgumentHandler.handle_kwargs(kwargs) + else: + return DefaultArgumentHandler.handle_args(args) + + +class _ScikitCompat(ABC): + """ + Interface layer for the Scikit and Keras compatibility. + """ + + @abstractmethod + def transform(self, X): + raise NotImplementedError() + + @abstractmethod + def predict(self, X): + raise NotImplementedError() + + +class Pipeline(_ScikitCompat): + """ + The Pipeline class is the class from which all pipelines inherit. + Refer to this class for methods shared across different pipelines. + This base Pipeline class provides support for multiple inference engine backends. + + Base class implementing pipelined operations. + Pipeline workflow is defined as a sequence of the following operations: + + Input -> Tokenization -> Model Inference -> + Post-Processing (task dependent) -> Output + + Pipeline supports running with the DeepSparse engine or onnxruntime. + + :param model: loaded inference engine to run the model with, can be a + deepsparse Engine or onnxruntime InferenceSession + :param tokenizer: tokenizer to be used for preprocessing + :param config: transformers model config for this model + :param engine_type: name of inference engine that is used. Options are + deepsparse and onnxruntime + :param max_length: maximum sequence length to set for model inputs by default. + default value is 128 + :param input_names: list of input names to the neural network + :param args_parser: Reference to the object in charge of parsing supplied + pipeline parameters. A default is provided if None + :param binary_output: if True, stores outputs as pickled binaries to avoid + storing large amount of textual data. Default is False + """ + + default_input_names = None + + def __init__( + self, + model: Union[Engine, "onnxruntime.InferenceSession"], + tokenizer: PreTrainedTokenizer, + config: PretrainedConfig, + engine_type: str, + max_length: int = 128, + input_names: Optional[List[str]] = None, + args_parser: ArgumentHandler = None, + binary_output: bool = False, + ): + + self.model = model + self.tokenizer = tokenizer + self.config = config + self.engine_type = engine_type + self.max_length = max_length + self.input_names = input_names + self.binary_output = binary_output + self._args_parser = args_parser or DefaultArgumentHandler() + self._framework = ( + "np" if self.engine_type in [DEEPSPARSE_ENGINE, ORT_ENGINE] else "pt" + ) + + def transform(self, X): + """ + Scikit / Keras interface to transformers' pipelines. + This method will forward to __call__(). + """ + return self(X=X) + + def predict(self, X): + """ + Scikit / Keras interface to transformers' pipelines. + This method will forward to __call__(). + """ + return self(X=X) + + def _parse_and_tokenize( + self, *args, padding=True, add_special_tokens=True, **kwargs + ): + # Parse arguments + inputs = self._args_parser(*args, **kwargs) + inputs = self.tokenizer( + inputs, + add_special_tokens=add_special_tokens, + return_tensors=self._framework, + padding=PaddingStrategy.MAX_LENGTH.value, + truncation=TruncationStrategy.LONGEST_FIRST.value, + ) + + return inputs + + def __call__(self, *args, **kwargs): + inputs = self._parse_and_tokenize(*args, **kwargs) + return self._forward(inputs) + + def _forward(self, inputs): + if not all(name in inputs for name in self.input_names): + raise ValueError( + f"pipeline expected arrays with names {self.input_names}, received " + f"inputs: {list(inputs.keys())}" + ) + + if self.engine_type == ORT_ENGINE: + inputs = {k: v for k, v in inputs.items() if k in self.input_names} + return self.model.run(None, inputs) + elif self.engine_type == DEEPSPARSE_ENGINE: + return self.model.run([inputs[name] for name in self.input_names]) + # TODO: torch + # with self.device_placement(): + # with torch.no_grad(): + # inputs = self.ensure_tensor_on_device(**inputs) + # predictions = self.model(**inputs)[0].cpu() + # if return_tensors: + # return predictions + # else: + # return predictions.numpy() + + +class TokenClassificationArgumentHandler(ArgumentHandler): + """ + Handles arguments for token classification. + """ + + def __call__(self, inputs: Union[str, List[str]], **kwargs): + + if inputs is not None and isinstance(inputs, (list, tuple)) and len(inputs) > 0: + inputs = list(inputs) + batch_size = len(inputs) + elif isinstance(inputs, str): + inputs = [inputs] + batch_size = 1 + else: + raise ValueError("At least one input is required.") + + offset_mapping = kwargs.get("offset_mapping") + if offset_mapping: + if isinstance(offset_mapping, list) and isinstance( + offset_mapping[0], tuple + ): + offset_mapping = [offset_mapping] + if len(offset_mapping) != batch_size: + raise ValueError( + "offset_mapping should have the same batch size as the input" + ) + return inputs, offset_mapping + + +class QuestionAnsweringArgumentHandler(ArgumentHandler): + """ + QuestionAnsweringPipeline requires the user to provide multiple arguments + (i.e. question & context) to be mapped + to internal `transformers.SquadExample` + + QuestionAnsweringArgumentHandler manages all the possible to create a + `transformers.SquadExample` from the command-line supplied arguments + """ + + def __call__(self, *args, **kwargs): + # Position args, handling is sensibly the same as X and data, + # so forwarding to avoid duplicating + if args is not None and len(args) > 0: + if len(args) == 1: + kwargs["X"] = args[0] + else: + kwargs["X"] = list(args) + + # Generic compatibility with sklearn and Keras + # Batched data + if "X" in kwargs or "data" in kwargs: + inputs = kwargs["X"] if "X" in kwargs else kwargs["data"] + + if isinstance(inputs, dict): + inputs = [inputs] + else: + # Copy to avoid overriding arguments + inputs = [i for i in inputs] + + for i, item in enumerate(inputs): + if isinstance(item, dict): + if any(k not in item for k in ["question", "context"]): + raise KeyError( + "You need to provide a dictionary with keys " + "{question:..., context:...}" + ) + + inputs[i] = QuestionAnsweringPipeline.create_sample(**item) + + elif not isinstance(item, SquadExample): + arg_name = "X" if "X" in kwargs else "data" + raise ValueError( + f"{arg_name} argument needs to be of type " + "(list[SquadExample | dict], SquadExample, dict)" + ) + + # Tabular input + elif "question" in kwargs and "context" in kwargs: + if isinstance(kwargs["question"], str): + kwargs["question"] = [kwargs["question"]] + + if isinstance(kwargs["context"], str): + kwargs["context"] = [kwargs["context"]] + + inputs = [ + QuestionAnsweringPipeline.create_sample(q, c) + for q, c in zip(kwargs["question"], kwargs["context"]) + ] + else: + raise ValueError(f"Unknown arguments {kwargs}") + + if not isinstance(inputs, list): + inputs = [inputs] + + return inputs + + +class TextClassificationPipeline(Pipeline): + """ + Text classification pipeline using any `ModelForSequenceClassification`. + + This text classification pipeline can currently be loaded from `pipeline()` + using the following task identifier: `"text-classification"`. + + The models that this pipeline can use are models that have been fine-tuned on + a text classification task. + + :param return_all_scores: set True to return all model scores. Default False + """ + + def __init__(self, return_all_scores: bool = False, **kwargs): + super().__init__(**kwargs) + + self.return_all_scores = return_all_scores + + def __call__(self, *args, **kwargs): + """ + Classify the text(s) given as inputs. + + :param args: One or several texts (or one list of prompts) to classify + :param args: kwargs for inner call function + :return: A list or a list of list of dicts: Each result comes as list of dicts + with the following keys: + - `label` -- The label predicted. + - `score` -- The corresponding probability. + If ``self.return_all_scores=True``, one dictionary is returned per label + """ + outputs = super().__call__(*args, **kwargs) + + if isinstance(outputs, list) and outputs: + outputs = outputs[0] + + if self.config.num_labels == 1: + scores = 1.0 / (1.0 + np.exp(-outputs)) + else: + scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True) + if self.return_all_scores: + return [ + [ + {"label": self.config.id2label[i], "score": score.item()} + for i, score in enumerate(item) + ] + for item in scores + ] + else: + return [ + { + "label": self.config.id2label[item.argmax()], + "score": item.max().item(), + } + for item in scores + ] + + +class AggregationStrategy(ExplicitEnum): + """ + All the valid aggregation strategies for TokenClassificationPipeline + """ + + NONE = "none" + SIMPLE = "simple" + FIRST = "first" + AVERAGE = "average" + MAX = "max" + + +class TokenClassificationPipeline(Pipeline): + """ + Named Entity Recognition pipeline using any `ModelForTokenClassification`. + + This token classification pipeline can currently be loaded from `pipeline()` + using the following task identifier: `"token-classification"`. + + The models that this pipeline can use are models that have been fine-tuned on + a token classification task. + + :param args_parser: argument parser to use default is + TokenClassificationArgumentHandler + :param aggregation_strategy: AggregationStrategy Enum object to determine + the pipeline aggregation strategy. Default is AggregationStrategy.NONE + :param ignore_labels: list of labels to ignore. Default is `["O"]` + """ + + default_input_names = "sequences" + + def __init__( + self, + args_parser: ArgumentHandler = None, + aggregation_strategy: AggregationStrategy = AggregationStrategy.NONE, + ignore_labels: List[str] = False, + **kwargs, + ): + super().__init__( + args_parser=args_parser or TokenClassificationArgumentHandler(), + **kwargs, + ) + + self.ignore_labels = ignore_labels or ["O"] + + if isinstance(aggregation_strategy, str): + aggregation_strategy = AggregationStrategy[aggregation_strategy.upper()] + + if ( + aggregation_strategy + in { + AggregationStrategy.FIRST, + AggregationStrategy.MAX, + AggregationStrategy.AVERAGE, + } + and not self.tokenizer.is_fast + ): + raise ValueError( + "Slow tokenizers cannot handle subwords. Please set the " + '`aggregation_strategy` option to `"simple"` or use a fast tokenizer.' + ) + + self.aggregation_strategy = aggregation_strategy + + def __call__(self, inputs: Union[str, List[str]], **kwargs): + """ + Classify each token of the text(s) given as inputs. + + + :param inputs: One or several texts (or one list of texts) for token + classification + :return: A list or a list of list of :obj:`dict`: Each result comes as a list + of dictionaries (one for each token in the corresponding input, or each + entity if this pipeline was instantiated with an aggregation_strategy) + with the following keys: + - `word` -- The token/word classified. + - `score` -- The corresponding probability for `entity`. + - `entity` -- The entity predicted for that token/word (it is named + `entity_group` when `aggregation_strategy` is not `"none"`. + - `index` -- The index of the corresponding token in the sentence. + - `start` -- index of the start of the corresponding entity in the sentence + Only exists if the offsets are available within the tokenizer + - `end` -- The index of the end of the corresponding entity in the sentence. + Only exists if the offsets are available within the tokenizer + """ + + _inputs, offset_mappings = self._args_parser(inputs, **kwargs) + + answers = [] + + tokens = self.tokenizer( + _inputs, + return_tensors=self._framework, + truncation=TruncationStrategy.LONGEST_FIRST.value, + padding=PaddingStrategy.MAX_LENGTH.value, + return_special_tokens_mask=True, + return_offsets_mapping=self.tokenizer.is_fast, + ) + + if self.tokenizer.is_fast: + offset_mapping = tokens.pop("offset_mapping") + elif not offset_mappings: + offset_mapping = [None] * len(_inputs) + + special_tokens_mask = tokens.pop("special_tokens_mask") + + # Forward + _forward_pass = self._forward(tokens) + for entities_index, current_entities in enumerate(_forward_pass[0]): + input_ids = tokens["input_ids"][entities_index] + + scores = np.exp(current_entities) / np.exp(current_entities).sum( + -1, keepdims=True + ) + pre_entities = self.gather_pre_entities( + _inputs[entities_index], + input_ids, + scores, + offset_mapping[entities_index], + special_tokens_mask[entities_index], + ) + grouped_entities = self.aggregate(pre_entities, self.aggregation_strategy) + # Filter anything that is in self.ignore_labels + current_entities = [ + entity + for entity in grouped_entities + if entity.get("entity", None) not in self.ignore_labels + and entity.get("entity_group", None) not in self.ignore_labels + ] + answers.append(current_entities) + + if len(answers) == 1: + return answers[0] + return answers + + def gather_pre_entities( + self, + sentence: str, + input_ids: np.ndarray, + scores: np.ndarray, + offset_mapping: Optional[List[Tuple[int, int]]], + special_tokens_mask: np.ndarray, + ) -> List[dict]: + pre_entities = [] + for idx, token_scores in enumerate(scores): + # Filter special_tokens, they should only occur + # at the sentence boundaries since we're not encoding pairs of + # sentences so we don't have to keep track of those. + if special_tokens_mask[idx]: + continue + + word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx])) + if offset_mapping is not None: + start_ind, end_ind = offset_mapping[idx] + word_ref = sentence[start_ind:end_ind] + is_subword = len(word_ref) != len(word) + + if int(input_ids[idx]) == self.tokenizer.unk_token_id: + word = word_ref + is_subword = False + else: + start_ind = None + end_ind = None + is_subword = False + + pre_entity = { + "word": word, + "scores": token_scores, + "start": start_ind, + "end": end_ind, + "index": idx, + "is_subword": is_subword, + } + pre_entities.append(pre_entity) + return pre_entities + + def aggregate( + self, pre_entities: List[dict], aggregation_strategy: AggregationStrategy + ) -> List[dict]: + if aggregation_strategy in { + AggregationStrategy.NONE, + AggregationStrategy.SIMPLE, + }: + entities = [] + for pre_entity in pre_entities: + entity_idx = pre_entity["scores"].argmax() + score = pre_entity["scores"][entity_idx] + entity = { + "entity": self.config.id2label[entity_idx], + "score": score, + "index": pre_entity["index"], + "word": pre_entity["word"], + "start": pre_entity["start"], + "end": pre_entity["end"], + } + entities.append(entity) + else: + entities = self.aggregate_words(pre_entities, aggregation_strategy) + + if aggregation_strategy == AggregationStrategy.NONE: + return entities + return self.group_entities(entities) + + def aggregate_word( + self, entities: List[dict], aggregation_strategy: AggregationStrategy + ) -> dict: + word = self.tokenizer.convert_tokens_to_string( + [entity["word"] for entity in entities] + ) + if aggregation_strategy == AggregationStrategy.FIRST: + scores = entities[0]["scores"] + idx = scores.argmax() + score = scores[idx] + entity = self.config.id2label[idx] + elif aggregation_strategy == AggregationStrategy.MAX: + max_entity = max(entities, key=lambda entity: entity["scores"].max()) + scores = max_entity["scores"] + idx = scores.argmax() + score = scores[idx] + entity = self.config.id2label[idx] + elif aggregation_strategy == AggregationStrategy.AVERAGE: + scores = np.stack([entity["scores"] for entity in entities]) + average_scores = np.nanmean(scores, axis=0) + entity_idx = average_scores.argmax() + entity = self.config.id2label[entity_idx] + score = average_scores[entity_idx] + else: + raise ValueError("Invalid aggregation_strategy") + new_entity = { + "entity": entity, + "score": score, + "word": word, + "start": entities[0]["start"], + "end": entities[-1]["end"], + } + return new_entity + + def aggregate_words( + self, entities: List[dict], aggregation_strategy: AggregationStrategy + ) -> List[dict]: + assert aggregation_strategy not in { + AggregationStrategy.NONE, + AggregationStrategy.SIMPLE, + }, "NONE and SIMPLE strategies are invalid" + + word_entities = [] + word_group = None + for entity in entities: + if word_group is None: + word_group = [entity] + elif entity["is_subword"]: + word_group.append(entity) + else: + word_entities.append( + self.aggregate_word(word_group, aggregation_strategy) + ) + word_group = [entity] + # Last item + word_entities.append(self.aggregate_word(word_group, aggregation_strategy)) + return word_entities + + def group_sub_entities(self, entities: List[dict]) -> dict: + # Get the first entity in the entity group + entity = entities[0]["entity"].split("-")[-1] + scores = np.nanmean([entity["score"] for entity in entities]) + tokens = [entity["word"] for entity in entities] + + entity_group = { + "entity_group": entity, + "score": np.mean(scores), + "word": self.tokenizer.convert_tokens_to_string(tokens), + "start": entities[0]["start"], + "end": entities[-1]["end"], + } + return entity_group + + def get_tag(self, entity_name: str) -> Tuple[str, str]: + if entity_name.startswith("B-"): + bi = "B" + tag = entity_name[2:] + elif entity_name.startswith("I-"): + bi = "I" + tag = entity_name[2:] + else: + # It's not in B-, I- format + bi = "B" + tag = entity_name + return bi, tag + + def group_entities(self, entities: List[dict]) -> List[dict]: + + entity_groups = [] + entity_group_disagg = [] + + for entity in entities: + if not entity_group_disagg: + entity_group_disagg.append(entity) + continue + + # If the current entity is similar and adjacent to the previous entity, + # append it to the disaggregated entity group + # The split is meant to account for the "B" and "I" prefixes + # Shouldn't merge if both entities are B-type + bi, tag = self.get_tag(entity["entity"]) + last_bi, last_tag = self.get_tag(entity_group_disagg[-1]["entity"]) + + if tag == last_tag and bi != "B": + # Modify subword type to be previous_type + entity_group_disagg.append(entity) + else: + # If the current entity is different from the previous entity + # aggregate the disaggregated entity group + entity_groups.append(self.group_sub_entities(entity_group_disagg)) + entity_group_disagg = [entity] + if entity_group_disagg: + # it's the last entity, add it to the entity groups + entity_groups.append(self.group_sub_entities(entity_group_disagg)) + + return entity_groups + + +class QuestionAnsweringPipeline(Pipeline): + """ + Question Answering pipeline using any `ModelForQuestionAnswering` + + This question answering pipeline can currently be loaded from `pipeline()` + using the following task identifier: `"question-answering"`. + + The models that this pipeline can use are models that have been fine-tuned on + a question answering task. + + :param model: loaded inference engine to run the model with, can be a + deepsparse Engine or onnxruntime InferenceSession + :param tokenizer: tokenizer to be used for preprocessing + :param config: transformers model config for this model + :param engine_type: name of inference engine that is used. Options are + deepsparse and onnxruntime + :param input_names: list of input names to the neural network + :param args_parser: Reference to the object in charge of parsing supplied + pipeline parameters. A default is provided if None + :param binary_output: if True, stores outputs as pickled binaries to avoid + storing large amount of textual data. Default is False + """ + + default_input_names = "question,context" + + def __init__( + self, + model: Union[Engine, "onnxruntime.InferenceSession"], + tokenizer: PreTrainedTokenizer, + engine_type: str, + input_names: Optional[List[str]] = None, + **kwargs, + ): + super().__init__( + model=model, + tokenizer=tokenizer, + engine_type=engine_type, + args_parser=QuestionAnsweringArgumentHandler(), + input_names=input_names, + **kwargs, + ) + + @staticmethod + def create_sample( + question: Union[str, List[str]], context: Union[str, List[str]] + ) -> Union[SquadExample, List[SquadExample]]: + """ + :param question: single question or list of question strings + :param context: single context or list of context strings + :return: processed SquadExample object(s) for each question/context pair given + """ + if isinstance(question, list): + return [ + SquadExample(None, q, c, None, None, None) + for q, c in zip(question, context) + ] + else: + return SquadExample(None, question, context, None, None, None) + + def __call__(self, *args, **kwargs): + """ + Answer the question(s) given as inputs by using the context(s). + Multiple arguments can be used to pass the context, question data + + :param args: SquadExample or list of them containing the question and context + :param X: SquadExample or list of them containing the question and context + :param data: SquadExample or list of them containing the question and context + :param question: single question or list of question strings + :param context: single context or list of context strings + :param topk: the number of answers to return. Will be chosen by + order of likelihood) + :param doc_stride: if the context is too long to fit with the question for the + model, it will be split in several chunks with some overlap. This argument + controls the size of that overlap + :param max_answer_len: maximum length of predicted answers (e.g., only + answers with a shorter length are considered) + :param max_seq_len: maximum length of the total sentence (context + question) + after tokenization. The context will be split in several chunks + (using the doc_stride) if needed + :param max_question_len: maximum length of the question after tokenization. + It will be truncated if needed + :param handle_impossible_answer: whether or not we accept impossible as an + answer + :param num_spans: maximum number of span to use as input from a long + context. Default is to stride the entire context string + :param preprocessed_inputs: if provided, preprocessing will be skipped in favor + of these inputs. Expected format is the output of self.preprocess; a tuple + of (examples, features_list) + :return: dict or list of dictionaries, each containing the following keys: + `"score"` - The probability associated to the answer + `"start"` - The start index of the answer + `"end"` - The end index of the answer + `"answer"` - The answer to the question + """ + # Set defaults values + kwargs.setdefault("topk", 1) + kwargs.setdefault("max_answer_len", 15) + kwargs.setdefault("handle_impossible_answer", False) + kwargs.setdefault("preprocessed_inputs", None) # (examples, features_list) + + if kwargs["topk"] < 1: + raise ValueError(f"topk parameter should be >= 1 (got {kwargs['topk']})") + + if kwargs["max_answer_len"] < 1: + raise ValueError( + "max_answer_len parameter should be >= 1 " + f"(got {kwargs['max_answer_len']})" + ) + + # run pre-processing if not provided + examples, features_list = kwargs["preprocessed_inputs"] or self.preprocess( + *args, **kwargs + ) + + # forward pass and post-processing + all_answers = [] + for features, example in zip(features_list, examples): + model_input_names = self.tokenizer.model_input_names + ["input_ids"] + fw_args = { + k: [feature.__dict__[k] for feature in features] + for k in model_input_names + } + + # Manage tensor allocation on correct device + fw_args = {k: np.array(v) for (k, v) in fw_args.items()} + start, end = self._forward(fw_args)[:2] + + # TODO: torch + # fw_args = {k: torch.tensor(v, device=self.device) + # for (k, v) in fw_args.items()} + # start, end = self.model(**fw_args)[:2] + # start, end = start.cpu().numpy(), end.cpu().numpy() + + min_null_score = 1000000 # large and positive + answers = [] + for (feature, start_, end_) in zip(features, start, end): + # Ensure padded tokens & question tokens cannot belong + undesired_tokens = ( + np.abs(np.array(feature.p_mask) - 1) & feature.attention_mask + ) + + # Generate mask + undesired_tokens_mask = undesired_tokens == 0.0 + + # Make sure non-context indexes cannot contribute to the softmax + start_ = np.where(undesired_tokens_mask, -10000.0, start_) + end_ = np.where(undesired_tokens_mask, -10000.0, end_) + + # Normalize logits and spans to retrieve the answer + start_ = np.exp( + start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)) + ) + end_ = np.exp( + end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)) + ) + + if kwargs["handle_impossible_answer"]: + min_null_score = min(min_null_score, (start_[0] * end_[0]).item()) + + # Mask CLS + start_[0] = end_[0] = 0.0 + + starts, ends, scores = self.decode( + start_, end_, kwargs["topk"], kwargs["max_answer_len"] + ) + + if not self.tokenizer.is_fast: + char_to_word = np.array(example.char_to_word_offset) + answers += [ + { + "score": score.item(), + "start": np.where( + char_to_word == feature.token_to_orig_map[s] + )[0][0].item(), + "end": np.where( + char_to_word == feature.token_to_orig_map[e] + )[0][-1].item(), + "answer": " ".join( + example.doc_tokens[ + feature.token_to_orig_map[ + s + ] : feature.token_to_orig_map[e] + + 1 + ] + ), + } + for s, e, score in zip(starts, ends, scores) + ] + else: + question_first = bool(self.tokenizer.padding_side == "right") + + # Sometimes the max probability token is in the middle of a word so: + # we start by finding the right word containing the token with + # `token_to_word` then we convert this word in a character span + answers += [ + { + "score": score.item(), + "start": feature.encoding.word_to_chars( + feature.encoding.token_to_word(s), + sequence_index=1 if question_first else 0, + )[0], + "end": feature.encoding.word_to_chars( + feature.encoding.token_to_word(e), + sequence_index=1 if question_first else 0, + )[1], + "answer": example.context_text[ + feature.encoding.word_to_chars( + feature.encoding.token_to_word(s), + sequence_index=1 if question_first else 0, + )[0] : feature.encoding.word_to_chars( + feature.encoding.token_to_word(e), + sequence_index=1 if question_first else 0, + )[ + 1 + ] + ], + } + for s, e, score in zip(starts, ends, scores) + ] + + if kwargs["handle_impossible_answer"]: + answers.append( + {"score": min_null_score, "start": 0, "end": 0, "answer": ""} + ) + + answers = sorted(answers, key=lambda x: x["score"], reverse=True)[ + : kwargs["topk"] + ] + all_answers += answers + + if len(all_answers) == 1: + return all_answers[0] + return all_answers + + def preprocess(self, *args, **kwargs) -> Tuple[Any, Any]: + """ + preprocess the given QA model inputs using squad_convert_examples_to_features + + :param args: SquadExample or list of them containing the question and context + :param X: SquadExample or list of them containing the question and context + :param data: SquadExample or list of them containing the question and context + :param question: single question or list of question strings + :param context: single context or list of context strings + :param doc_stride: if the context is too long to fit with the question for the + model, it will be split in several chunks with some overlap. This argument + controls the size of that overlap + :param max_seq_len: maximum length of the total sentence (context + question) + after tokenization. The context will be split in several chunks + (using the doc_stride) if needed + :param max_question_len: maximum length of the question after tokenization. + It will be truncated if needed + :param num_spans: maximum number of spans to use as input from a long + context. Default is to stride the entire context string + :return: tuple of SquadExample inputs and preprocessed features list + """ + kwargs.setdefault("doc_stride", 128) + kwargs.setdefault("max_seq_len", self.max_length) + kwargs.setdefault("max_question_len", 64) + kwargs.setdefault("num_spans", None) + + # Convert inputs to features + examples = self._args_parser(*args, **kwargs) + if not self.tokenizer.is_fast: + features_list = [ + squad_convert_examples_to_features( + examples=[example], + tokenizer=self.tokenizer, + max_seq_length=kwargs["max_seq_len"], + doc_stride=kwargs["doc_stride"], + max_query_length=kwargs["max_question_len"], + padding_strategy=PaddingStrategy.MAX_LENGTH.value, + is_training=False, + tqdm_enabled=False, + ) + for example in examples + ] + else: + features_list = self._encode_features_fast(examples, **kwargs) + + if kwargs["num_spans"]: + features_list = [ + features[: kwargs["num_spans"]] for features in features_list + ] + + return examples, features_list + + def decode( + self, start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int + ) -> Tuple: + """ + :param start: Individual start probabilities for each token + :param end: Individual end probabilities for each token + :param topk: Indicates how many possible answer span(s) to extract from the + model output + :param max_answer_len: Maximum size of the answer to extract from the model + output + :return: probabilities for each span to be the actual answer. Will filter out + unwanted and impossible cases + """ + # Ensure we have batch axis + if start.ndim == 1: + start = start[None] + + if end.ndim == 1: + end = end[None] + + # Compute the score of each tuple(start, end) to be the real answer + outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1)) + + # Remove candidate with end < start and end - start > max_answer_len + candidates = np.tril(np.triu(outer), max_answer_len - 1) + + # Inspired by Chen & al. (https://github.com/facebookresearch/DrQA) + scores_flat = candidates.flatten() + if topk == 1: + idx_sort = [np.argmax(scores_flat)] + elif len(scores_flat) < topk: + idx_sort = np.argsort(-scores_flat) + else: + idx = np.argpartition(-scores_flat, topk)[0:topk] + idx_sort = idx[np.argsort(-scores_flat[idx])] + + start, end = np.unravel_index(idx_sort, candidates.shape)[1:] + return start, end, candidates[0, start, end] + + def span_to_answer( + self, text: str, start: int, end: int + ) -> Dict[str, Union[str, int]]: + """ + When decoding from token probabilities, this method maps token indexes to + actual word in the initial context. + + :param text: The actual context to extract the answer from + :param start: The answer starting token index + :param end: The answer end token index + :return: Dictionary containing the start, end, and answer + """ + words = [] + token_idx = char_start_idx = char_end_idx = chars_idx = 0 + + for i, word in enumerate(text.split(" ")): + token = self.tokenizer.tokenize(word) + + # Append words if they are in the span + if start <= token_idx <= end: + if token_idx == start: + char_start_idx = chars_idx + + if token_idx == end: + char_end_idx = chars_idx + len(word) + + words += [word] + + # Stop if we went over the end of the answer + if token_idx > end: + break + + # Append the subtokenization length to the running index + token_idx += len(token) + chars_idx += len(word) + 1 + + # Join text with spaces + return { + "answer": " ".join(words), + "start": max(0, char_start_idx), + "end": min(len(text), char_end_idx), + } + + def _encode_features_fast(self, examples: Any, **kwargs) -> List[SquadFeatures]: + features_list = [] + for example in examples: + # Define the side we want to truncate / pad and the text/pair sorting + question_first = bool(self.tokenizer.padding_side == "right") + + encoded_inputs = self.tokenizer( + text=example.question_text if question_first else example.context_text, + text_pair=( + example.context_text if question_first else example.question_text + ), + padding=PaddingStrategy.MAX_LENGTH.value, + truncation="only_second" if question_first else "only_first", + max_length=kwargs["max_seq_len"], + stride=kwargs["doc_stride"], + return_tensors="np", + return_token_type_ids=True, + return_overflowing_tokens=True, + return_offsets_mapping=True, + return_special_tokens_mask=True, + ) + + total_spans = len(encoded_inputs["input_ids"]) + + # p_mask: mask with 1 for token than cannot be in the answer + # We put 0 on the tokens from the context and 1 everywhere else + p_mask = np.asarray( + [ + [ + tok != 1 if question_first else 0 + for tok in encoded_inputs.sequence_ids(span_id) + ] + for span_id in range(total_spans) + ] + ) + + # keep the cls_token unmasked + if self.tokenizer.cls_token_id is not None: + cls_index = np.nonzero( + encoded_inputs["input_ids"] == self.tokenizer.cls_token_id + ) + p_mask[cls_index] = 0 + + features = [] + for span_idx in range(total_spans): + features.append( + SquadFeatures( + input_ids=encoded_inputs["input_ids"][span_idx], + attention_mask=encoded_inputs["attention_mask"][span_idx], + token_type_ids=encoded_inputs["token_type_ids"][span_idx], + p_mask=p_mask[span_idx].tolist(), + encoding=encoded_inputs[span_idx], + # the following values are unused for fast tokenizers + cls_index=None, + token_to_orig_map={}, + example_index=0, + unique_id=0, + paragraph_len=0, + token_is_max_context=0, + tokens=[], + start_position=0, + end_position=0, + is_impossible=False, + qas_id=None, + ) + ) + features_list.append(features) + return features_list + + +@dataclass +class TaskInfo: + """ + Information about an NLP task + + :param pipeline_constructor: reference to constructor for the given pipeline task + :param default model name: the transformers canonical name for the default model + :param base_stub: sparsezoo stub path for the base model for this task + :param default_pruned_stub: sparsezoo stub path for the default pruned model + for this task + :param default_quant_stub: sparsezoo stub path for the default quantized model + for this task + """ + + pipeline_constructor: Callable[[Any], Pipeline] + default_model_name: str + base_stub: Optional[str] = None + default_pruned_stub: Optional[str] = None + default_quant_stub: Optional[str] = None + + +# Register all the supported tasks here +SUPPORTED_TASKS = { + "ner": TaskInfo( + pipeline_constructor=TokenClassificationPipeline, + default_model_name="bert-base-uncased", + ), + "question-answering": TaskInfo( + pipeline_constructor=QuestionAnsweringPipeline, + default_model_name="bert-base-uncased", + base_stub=( + "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none" + ), + default_pruned_stub=( + "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/" + "pruned-aggressive_98" + ), + ), + "sentiment-analysis": TaskInfo( + pipeline_constructor=TextClassificationPipeline, + default_model_name="bert-base-uncased", + ), + "text-classification": TaskInfo( + pipeline_constructor=TextClassificationPipeline, + default_model_name="bert-base-uncased", + ), + "token-classification": TaskInfo( + pipeline_constructor=TokenClassificationPipeline, + default_model_name="bert-base-uncased", + ), +} + +DEEPSPARSE_ENGINE = "deepsparse" +ORT_ENGINE = "onnxruntime" + +SUPPORTED_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] + + +def pipeline( + task: str, + model_name: Optional[str] = None, + model_path: Optional[str] = None, + engine_type: str = DEEPSPARSE_ENGINE, + config: Optional[Union[str, PretrainedConfig]] = None, + tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, + max_length: int = 128, + num_cores: Optional[int] = None, + scheduler: Optional[str] = None, + batch_size: Optional[int] = 1, + **kwargs, +) -> Pipeline: + """ + Utility factory method to build a Pipeline + + :param task: name of the task to define which pipeline to create. Currently + supported task - "question-answering" + :param model_name: canonical name of the hugging face model this model is based on + :param model_path: path to model directory containing `model.onnx`, `config.json`, + and `tokenizer.json` files, ONNX model file, or SparseZoo stub + :param engine_type: inference engine name to use. Supported options are 'deepsparse' + and 'onnxruntime' + :param config: huggingface model config, if none provided, default will be used + which will be from the model name or sparsezoo stub if given for model path + :param tokenizer: huggingface tokenizer, if none provided, default will be used + :param max_length: maximum sequence length of model inputs. default is 128 + :param num_cores: number of CPU cores to run engine with. Default is the maximum + available + :param scheduler: The scheduler to use for the engine. Can be None, single or multi. + :param batch_size: The batch_size to use for the pipeline. Defaults to 1 + Note: `question-answering` pipeline only supports a batch_size of 1. + :param kwargs: additional key word arguments for task specific pipeline constructor + :return: Pipeline object for the given taks and model + """ + + # Retrieve the task + if task not in SUPPORTED_TASKS: + raise KeyError( + f"Unknown task {task}, available tasks are {list(SUPPORTED_TASKS.keys())}" + ) + if engine_type not in SUPPORTED_ENGINES: + raise ValueError( + f"Unsupported engine {engine_type}, supported engines " + f"are {SUPPORTED_ENGINES}" + ) + if task == "question-answering" and batch_size != 1: + raise ValueError( + f"{task} pipeline only supports batch_size 1. " + f"Supplied batch_size = {batch_size}" + ) + task_info = SUPPORTED_TASKS[task] + + model_path = model_path or _get_default_model_path(task_info) + model_name = model_name or task_info.default_model_name + + onnx_path, config_path, tokenizer_path = get_onnx_path_and_configs(model_path) + + # default the tokenizer and config to file in model directory or given model name + config = config or config_path or model_name + tokenizer = tokenizer or tokenizer_path or model_name + + # create model + model, input_names = _create_model( + onnx_path, + engine_type, + num_cores, + max_length, + scheduler=scheduler, + batch_size=batch_size, + ) + + # Instantiate tokenizer if needed + if isinstance(tokenizer, (str, tuple)): + if isinstance(tokenizer, tuple): + # For tuple we have (tokenizer name, {kwargs}) + tokenizer_kwargs = tokenizer[1] + tokenizer_kwargs["model_max_length"] = max_length + tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1]) + else: + tokenizer = AutoTokenizer.from_pretrained( + tokenizer, model_max_length=max_length + ) + + # Instantiate config if needed + if config is not None and isinstance(config, str): + config = AutoConfig.from_pretrained(config, finetuning_task=task) + + return task_info.pipeline_constructor( + model=model, + tokenizer=tokenizer, + config=config, + engine_type=engine_type, + max_length=max_length, + input_names=input_names, + **kwargs, + ) + + +def _get_default_model_path(task_info: TaskInfo) -> str: + if cpu.cpu_vnni_compatible() and task_info.default_quant_stub: + return task_info.default_quant_stub + return task_info.default_pruned_stub or task_info.base_stub + + +def _create_model( + model_path: str, + engine_type: str, + num_cores: Optional[int], + max_length: int = 128, + scheduler: Optional[str] = None, + batch_size: int = 1, +) -> Tuple[Union[Engine, "onnxruntime.InferenceSession"], List[str]]: + onnx_path, input_names, _ = overwrite_transformer_onnx_model_inputs( + model_path, max_length=max_length + ) + + if engine_type == DEEPSPARSE_ENGINE: + model = compile_model( + onnx_path, + batch_size=batch_size, + num_cores=num_cores, + scheduler=scheduler, + ) + elif engine_type == ORT_ENGINE: + _validate_ort_import() + sess_options = onnxruntime.SessionOptions() + if num_cores is not None: + sess_options.intra_op_num_threads = num_cores + sess_options.log_severity_level = 3 + sess_options.graph_optimization_level = ( + onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL + ) + + model = onnxruntime.InferenceSession(onnx_path, sess_options=sess_options) + + return model, input_names + + +def _validate_ort_import(): + if ort_import_error is not None: + raise ImportError( + "An exception occurred when importing onxxruntime. Please verify that " + "onnxruntime is installed in order to use the onnxruntime inference " + f"engine. \n\nException info: {ort_import_error}" + ) + + +def process_dataset( + pipeline_object: Callable, + data_path: str, + batch_size: int, + task: str, + output_path: str, +) -> None: + """ + :param pipeline_object: An instantiated pipeline Callable object + :param data_path: Path to input file, supports csv, json and text files + :param batch_size: batch_size to use for inference + :param task: The task pipeline is instantiated for + :param output_path: Path to a json file to output inference results to + """ + batch_loader = get_batch_loader( + data_file=data_path, + batch_size=batch_size, + task=task, + ) + # Wraps pipeline object to make numpy types serializable + pipeline_object = fix_numpy_types(pipeline_object) + with open(output_path, "a") as output_file: + for batch in batch_loader: + batch_output = pipeline_object(**batch) + json.dump(batch_output, output_file) + output_file.write("\n") diff --git a/src/deepsparse/transformers/pipelines/__init__.py b/src/deepsparse/transformers/pipelines/__init__.py deleted file mode 100644 index 9986181a2a..0000000000 --- a/src/deepsparse/transformers/pipelines/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# flake8: noqa - -from .pipeline import * -from .question_answering import * -from .text_classification import * -from .token_classification import * diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py deleted file mode 100644 index 2fdcd27236..0000000000 --- a/src/deepsparse/transformers/pipelines/pipeline.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Base Pipeline class for transformers inference pipeline -""" - - -import warnings -from typing import Any, List, Mapping, Optional - -import numpy -from transformers.models.auto import AutoConfig, AutoTokenizer - -from deepsparse import Pipeline -from deepsparse.transformers.helpers import ( - get_onnx_path_and_configs, - overwrite_transformer_onnx_model_inputs, -) - - -__all__ = [ - "TransformersPipeline", - "pipeline", -] - - -class TransformersPipeline(Pipeline): - """ - Base deepsparse.Pipeline class for transformers model loading. This class handles - the parsing of deepsparse-transformers files and model inputs, supporting loading - from sparsezoo, a directory containing a model.onnx, tokenizer, and model config, - or just an ONNX file with the ability to load a tokenizer and model config from - a default huggingface-transformers model. - - Note, when implementing child tasks in deepsparse.transformers.pipelines, - in addition to registering task names with Pipeline.register, task names should - be added to the supported nlp tasks in deepsparse.tasks so they can be properly - imported at runtime. - - :param model_path: sparsezoo stub to a transformers model, an ONNX file, or - (preferred) a directory containing a model.onnx, tokenizer config, and model - config. If no tokenizer and/or model config(s) are found, then they will be - loaded from huggingface transformers using the `default_model_name` key - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: static sequence length to use for inference - :param default_model_name: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the `model_path`. - Default is 'bert-base-uncased' - """ - - def __init__( - self, - *, - sequence_length: int = 128, - default_model_name: str = "bert-base-uncased", - **kwargs, - ): - - self._sequence_length = sequence_length - self._default_model_name = default_model_name - - self.config = None - self.tokenizer = None - self.onnx_input_names = None - - self._temp_model_directory = None - - super().__init__(**kwargs) - - @property - def sequence_length(self) -> int: - """ - :return: static sequence length to use for inference - """ - return self._sequence_length - - @property - def default_model_name(self) -> str: - """ - :return: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the - `model_path` - """ - return self._default_model_name - - def setup_onnx_file_path(self) -> str: - """ - Parses ONNX, tokenizer, and config file paths from the given `model_path`. - Supports sparsezoo stubs. If a tokenizer and/or config file are not found, - they will be defaulted to the default_model_name in the transformers repo - - :return: file path to the processed ONNX file for the engine to compile - """ - onnx_path, config_path, tokenizer_path = get_onnx_path_and_configs( - self.model_path - ) - - # default config + tokenizer if necessary - config_path = config_path or self.default_model_name - tokenizer_path = tokenizer_path or self.default_model_name - - self.config = AutoConfig.from_pretrained( - config_path, finetuning_task=self.task if hasattr(self, "task") else None - ) - self.tokenizer = AutoTokenizer.from_pretrained( - tokenizer_path, model_max_length=self.sequence_length - ) - - # overwrite onnx graph to given required input shape - ( - onnx_path, - self.onnx_input_names, - self._temp_model_directory, - ) = overwrite_transformer_onnx_model_inputs( - onnx_path, max_length=self.sequence_length - ) - - return onnx_path - - def tokens_to_engine_input( - self, tokens: Mapping[Any, numpy.ndarray] - ) -> List[numpy.ndarray]: - """ - :param tokens: outputs of the pipeline tokenizer - :return: list of numpy arrays in expected order for model input - """ - if not all(name in tokens for name in self.onnx_input_names): - raise ValueError( - f"pipeline expected arrays with names {self.onnx_input_names}, " - f"received inputs: {list(tokens.keys())}" - ) - - return [tokens[name] for name in self.onnx_input_names] - - -def pipeline( - task: str, - model_name: Optional[str] = None, - model_path: Optional[str] = None, - engine_type: str = "deepsparse", - config: Optional[str] = None, - tokenizer: Optional[str] = None, - max_length: int = 128, - num_cores: Optional[int] = None, - scheduler: Optional[str] = None, - batch_size: Optional[int] = 1, - **kwargs, -) -> Pipeline: - """ - [DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete DeepSparse - pipelines for tranformers tasks use deepsparse.Pipeline.create(task, ...) - - Utility factory method to build a Pipeline - - :param task: name of the task to define which pipeline to create. Currently - supported task - "question-answering" - :param model_name: canonical name of the hugging face model this model is based on - :param model_path: path to model directory containing `model.onnx`, `config.json`, - and `tokenizer.json` files, ONNX model file, or SparseZoo stub - :param engine_type: inference engine name to use. Options are 'deepsparse' - and 'onnxruntime'. Default is 'deepsparse' - :param config: huggingface model config, if none provided, default will be used - which will be from the model name or sparsezoo stub if given for model path - :param tokenizer: huggingface tokenizer, if none provided, default will be used - :param max_length: maximum sequence length of model inputs. default is 128 - :param num_cores: number of CPU cores to run engine with. Default is the maximum - available - :param scheduler: The scheduler to use for the engine. Can be None, single or multi - :param batch_size: The batch_size to use for the pipeline. Defaults to 1 - Note: `question-answering` pipeline only supports a batch_size of 1. - :param kwargs: additional key word arguments for task specific pipeline constructor - :return: Pipeline object for the given taks and model - """ - warnings.warn( - "[DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete " - "DeepSparse pipelines for tranformers tasks use deepsparse.Pipeline.create()" - ) - - if config is not None or tokenizer is not None: - raise ValueError( - "Directly passing in a config or tokenizer to DeepSparse transformers " - "pipelines is no longer supported. config and tokenizer objects should " - "be specified by including config.json and tokenizer.json files in the " - "model directory respectively" - ) - - return Pipeline.create( - task=task, - model_path=model_path, - engine_type=engine_type, - batch_size=batch_size, - num_cores=num_cores, - scheduler=scheduler, - sequence_length=max_length, - default_model_name=model_name, - **kwargs, - ) diff --git a/src/deepsparse/transformers/pipelines/question_answering.py b/src/deepsparse/transformers/pipelines/question_answering.py deleted file mode 100644 index ba57117dad..0000000000 --- a/src/deepsparse/transformers/pipelines/question_answering.py +++ /dev/null @@ -1,409 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# postprocessing adapted from huggingface/transformers - -# Copyright 2021 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Pipeline implementation and pydantic models for question answering transformers -tasks -""" - - -from typing import Any, Dict, List, Tuple, Type - -import numpy -from pydantic import BaseModel, Field -from transformers.data import ( - SquadExample, - SquadFeatures, - squad_convert_examples_to_features, -) -from transformers.tokenization_utils_base import PaddingStrategy - -from deepsparse import Pipeline -from deepsparse.transformers.pipelines import TransformersPipeline - - -__all__ = [ - "QuestionAnsweringInput", - "QuestionAnsweringOutput", - "QuestionAnsweringPipeline", -] - - -class QuestionAnsweringInput(BaseModel): - """ - Schema for inputs to question_answering pipelines - """ - - question: str = Field(description="String question to be answered") - context: str = Field(description="String representing context for answer") - - -class QuestionAnsweringOutput(BaseModel): - """ - Schema for question_answering pipeline output. Values are in batch order - """ - - score: float = Field(description="confidence score for prediction") - answer: str = Field(description="predicted answer") - start: int = Field(description="start index of the answer") - end: int = Field(description="end index of the answer") - - -@Pipeline.register( - task="question_answering", - task_aliases=["qa"], - default_model_path=( - "zoo:nlp/question_answering/bert-base/pytorch/huggingface/" - "squad/12layer_pruned80_quant-none-vnni" - ), -) -class QuestionAnsweringPipeline(TransformersPipeline): - """ - transformers question_answering pipeline - - example instantiation: - ```python - question_answering = Pipeline.create( - task="question_answering", - model_path="question_answering_model_dir/", - ) - ``` - - :param model_path: sparsezoo stub to a transformers model, an ONNX file, or - (preferred) a directory containing a model.onnx, tokenizer config, and model - config. If no tokenizer and/or model config(s) are found, then they will be - loaded from huggingface transformers using the `default_model_name` key - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - Default is 128 - :param default_model_name: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the `model_path`. - Default is 'bert-base-uncased' - :param doc_stride: if the context is too long to fit with the question for the - model, it will be split in several chunks with some overlap. This argument - controls the size of that overlap. Currently, only reading the first span - is supported (everything after doc_stride will be truncated). Default - is 128 - :param max_question_len: maximum length of the question after tokenization. - It will be truncated if needed. Default is 64 - :param max_answer_len: maximum length of answer after decoding. Default is 15 - """ - - def __init__( - self, - *, - doc_stride: int = 128, - max_question_length: int = 64, - max_answer_length: int = 15, - **kwargs, - ): - - if kwargs.get("batch_size") and kwargs["batch_size"] > 1: - raise ValueError( - f"{self.__class__.__name__} currently only supports batch size 1, " - f"batch size set to {kwargs['batch_size']}" - ) - - self._doc_stride = doc_stride - self._max_question_length = max_question_length - self._max_answer_length = max_answer_length - - super().__init__(**kwargs) - - @property - def doc_stride(self) -> int: - """ - :return: if the context is too long to fit with the question for the - model, it will be split in several chunks with some overlap. This argument - controls the size of that overlap. Currently, only reading the first span - is supported (everything after doc_stride will be truncated) - """ - return self._doc_stride - - @property - def max_answer_length(self) -> int: - """ - :return: maximum length of answer after decoding - """ - return self._max_answer_length - - @property - def max_question_length(self) -> int: - """ - :return: maximum length of the question after tokenization. - It will be truncated if needed - """ - return self._max_question_length - - @property - def input_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return QuestionAnsweringInput - - @property - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return QuestionAnsweringOutput - - def process_inputs( - self, - inputs: QuestionAnsweringInput, - ) -> Tuple[List[numpy.ndarray], Dict[str, Any]]: - """ - :param inputs: inputs to the pipeline. Must be the type of the - QuestionAnsweringInput - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine and - dictionary of parsed features and original extracted example - """ - squad_example = SquadExample( - None, inputs.question, inputs.context, None, None, None - ) - features = self._tokenize(squad_example) - tokens = features.__dict__ - - engine_inputs = self.tokens_to_engine_input(tokens) - # add batch dimension, assuming batch size 1 - engine_inputs = [numpy.expand_dims(inp, axis=0) for inp in engine_inputs] - - return engine_inputs, dict( - features=features, - example=squad_example, - ) - - def process_engine_outputs( - self, engine_outputs: List[numpy.ndarray], **kwargs - ) -> BaseModel: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - features = kwargs["features"] - example = kwargs["example"] - start_vals, end_vals = engine_outputs[:2] - - # assuming batch size 0 - start = start_vals[0] - end = end_vals[0] - - # Ensure padded tokens & question tokens cannot belong - undesired_tokens = ( - numpy.abs(numpy.array(features.p_mask) - 1) & features.attention_mask - ) - - # Generate mask - undesired_tokens_mask = undesired_tokens == 0.0 - - # Make sure non-context indexes cannot contribute to the softmax - start = numpy.where(undesired_tokens_mask, -10000.0, start) - end = numpy.where(undesired_tokens_mask, -10000.0, end) - - # Normalize logits and spans to retrieve the answer - start = numpy.exp( - start - numpy.log(numpy.sum(numpy.exp(start), axis=-1, keepdims=True)) - ) - end = numpy.exp( - end - numpy.log(numpy.sum(numpy.exp(end), axis=-1, keepdims=True)) - ) - - # Mask CLS - start[0] = 0.0 - end[0] = 0.0 - - ans_start, ans_end, scores = self._decode(start, end) - # assuming one stride, so grab first idx - ans_start = ans_start[0] - ans_end = ans_end[0] - score = scores[0] - - # decode start, end idx into text - if not self.tokenizer.is_fast: - char_to_word = numpy.array(example.char_to_word_offset) - return self.output_schema( - score=score.item(), - start=numpy.where( - char_to_word == features.token_to_orig_map[ans_start] - )[0][0].item(), - end=numpy.where(char_to_word == features.token_to_orig_map[ans_end])[0][ - -1 - ].item(), - answer=" ".join( - example.doc_tokens[ - features.token_to_orig_map[ - ans_start - ] : features.token_to_orig_map[ans_end] - + 1 - ] - ), - ) - else: - question_first = bool(self.tokenizer.padding_side == "right") - - # Sometimes the max probability token is in the middle of a word so: - # we start by finding the right word containing the token with - # `token_to_word` then we convert this word in a character span - return self.output_schema( - score=score.item(), - start=features.encoding.word_to_chars( - features.encoding.token_to_word(ans_start), - sequence_index=1 if question_first else 0, - )[0], - end=features.encoding.word_to_chars( - features.encoding.token_to_word(ans_end), - sequence_index=1 if question_first else 0, - )[1], - answer=example.context_text[ - features.encoding.word_to_chars( - features.encoding.token_to_word(ans_start), - sequence_index=1 if question_first else 0, - )[0] : features.encoding.word_to_chars( - features.encoding.token_to_word(ans_end), - sequence_index=1 if question_first else 0, - )[ - 1 - ] - ], - ) - - def _tokenize(self, example: SquadExample): - if not self.tokenizer.is_fast: - features = squad_convert_examples_to_features( - examples=[example], - tokenizer=self.tokenizer, - max_set_length=self.sequence_length, - doc_stride=self.doc_stride, - max_query_length=self.max_question_length, - padding_strategy=PaddingStrategy.MAX_LENGTH.value, - is_training=False, - tqdm_enabled=False, - ) - # only 1 span supported so taking only the first element of features - # to add support for num_spans switch to features = features[:num_spans] - # not included for now due to static batch requirements in production - features = features[0] - else: - question_first = bool(self.tokenizer.padding_side == "right") - encoded_inputs = self.tokenizer( - text=example.question_text if question_first else example.context_text, - text_pair=( - example.context_text if question_first else example.question_text - ), - padding=PaddingStrategy.MAX_LENGTH.value, - truncation="only_second" if question_first else "only_first", - max_length=self.sequence_length, - stride=self.doc_stride, - return_tensors="np", - return_token_type_ids=True, - return_overflowing_tokens=True, - return_offsets_mapping=True, - return_special_tokens_mask=True, - ) - - # only 1 span supported so taking only the first element of features - # to add support for num_spans switch hardcoded 0 idx lookups to loop - # over values in num_spans - - # p_mask: mask with 1 for token than cannot be in the answer - # We put 0 on the tokens from the context and 1 everywhere else - p_mask = numpy.asarray( - [ - [ - tok != 1 if question_first else 0 - for tok in encoded_inputs.sequence_ids(0) - ] - ] - ) - - # keep the cls_token unmasked - if self.tokenizer.cls_token_id is not None: - cls_index = numpy.nonzero( - encoded_inputs["input_ids"][0] == self.tokenizer.cls_token_id - ) - p_mask[cls_index] = 0 - - features = SquadFeatures( - input_ids=encoded_inputs["input_ids"][0], - attention_mask=encoded_inputs["attention_mask"][0], - token_type_ids=encoded_inputs["token_type_ids"][0], - p_mask=p_mask[0].tolist(), - encoding=encoded_inputs[0], - # the following values are unused for fast tokenizers - cls_index=None, - token_to_orig_map={}, - example_index=0, - unique_id=0, - paragraph_len=0, - token_is_max_context=0, - tokens=[], - start_position=0, - end_position=0, - is_impossible=False, - qas_id=None, - ) - - return features - - def _decode(self, start: numpy.ndarray, end: numpy.ndarray) -> Tuple: - # Ensure we have batch axis - if start.ndim == 1: - start = start[None] - - if end.ndim == 1: - end = end[None] - - # Compute the score of each tuple(start, end) to be the real answer - outer = numpy.matmul(numpy.expand_dims(start, -1), numpy.expand_dims(end, 1)) - - # Remove candidate with end < start and end - start > max_answer_len - candidates = numpy.tril(numpy.triu(outer), self.max_answer_length - 1) - - # Inspired by Chen & al. (https://github.com/facebookresearch/DrQA) - scores_flat = candidates.flatten() - # only returning best result, use argsort for topk support - idx_sort = [numpy.argmax(scores_flat)] - - start, end = numpy.unravel_index(idx_sort, candidates.shape)[1:] - return start, end, candidates[0, start, end] diff --git a/src/deepsparse/transformers/pipelines/text_classification.py b/src/deepsparse/transformers/pipelines/text_classification.py deleted file mode 100644 index 0df9ba2b59..0000000000 --- a/src/deepsparse/transformers/pipelines/text_classification.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# postprocessing adapted from huggingface/transformers - -# Copyright 2021 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""" -Pipeline implementation and pydantic models for text classification transformers -tasks -""" - - -from typing import List, Type, Union - -import numpy -from pydantic import BaseModel, Field -from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy - -from deepsparse import Pipeline -from deepsparse.transformers.pipelines import TransformersPipeline - - -__all__ = [ - "TextClassificationInput", - "TextClassificationOutput", - "TextClassificationPipeline", -] - - -class TextClassificationInput(BaseModel): - """ - Schema for inputs to text_classification pipelines - """ - - sequences: Union[List[List[str]], List[str], str] = Field( - description="A string or List of strings representing input to" - "text_classification task" - ) - - -class TextClassificationOutput(BaseModel): - """ - Schema for text_classification pipeline output. Values are in batch order - """ - - labels: List[str] = Field(description="The predicted labels in batch order") - scores: List[float] = Field( - description="The corresponding probability for each label in the batch" - ) - - -@Pipeline.register( - task="text_classification", - task_aliases=["glue", "sentiment_analysis"], - default_model_path=( - "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/" - "sst2/12layer_pruned80_quant-none-vnni" - ), -) -class TextClassificationPipeline(TransformersPipeline): - """ - transformers text classification pipeline - - example instantiation: - ```python - text_classifier = Pipeline.create( - task="text_classification", - model_path="text_classification_model_dir/", - batch_size=BATCH_SIZE, - ) - ``` - - example batch size 1, single text inputs (ie sentiment analysis): - ```python - sentiment = text_classifier("the food tastes great") - sentiment = text_classifier(["the food tastes great"]) - sentiment = text_classifier([["the food tastes great"]]) - ``` - - example batch size 1, multi text input (ie QQP like tasks): - ```python - prediction = text_classifier([["how is the food?", "what is the food?"]]) - ``` - - example batch size n, single text inputs: - ```python - sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) - sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) - ``` - - :param model_path: sparsezoo stub to a transformers model, an ONNX file, or - (preferred) a directory containing a model.onnx, tokenizer config, and model - config. If no tokenizer and/or model config(s) are found, then they will be - loaded from huggingface transformers using the `default_model_name` key - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - Default is 128 - :param default_model_name: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the `model_path`. - Default is 'bert-base-uncased' - """ - - @property - def input_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return TextClassificationInput - - @property - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return TextClassificationOutput - - def parse_inputs(self, *args, **kwargs) -> BaseModel: - """ - :param args: ordered arguments to pipeline, only an input_schema object - is supported as an arg for this function - :param kwargs: keyword arguments to pipeline - :return: pipeline arguments parsed into the given `input_schema` - schema if necessary. If an instance of the `input_schema` is provided - it will be returned - """ - if args and kwargs: - raise ValueError( - f"{self.__class__} only support args OR kwargs. Found " - f" {len(args)} args and {len(kwargs)} kwargs" - ) - - if args: - if len(args) == 1: - # passed input_schema schema directly - if isinstance(args[0], self.input_schema): - return args[0] - return self.input_schema(sequences=args[0]) - else: - return self.input_schema(sequences=args) - - return self.input_schema(**kwargs) - - def process_inputs(self, inputs: TextClassificationInput) -> List[numpy.ndarray]: - """ - :param inputs: inputs to the pipeline. Must be the type of the - TextClassificationInput - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine - """ - tokens = self.tokenizer( - inputs.sequences, - add_special_tokens=True, - return_tensors="np", - padding=PaddingStrategy.MAX_LENGTH.value, - truncation=TruncationStrategy.LONGEST_FIRST.value, - ) - return self.tokens_to_engine_input(tokens) - - def process_engine_outputs(self, engine_outputs: List[numpy.ndarray]) -> BaseModel: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - outputs = engine_outputs - if isinstance(outputs, list): - outputs = outputs[0] - - scores = ( - 1.0 / (1.0 + numpy.exp(-outputs)) - if self.config.num_labels == 1 - else numpy.exp(outputs) / numpy.exp(outputs).sum(-1, keepdims=True) - ) - - labels = [] - label_scores = [] - - for score in scores: - labels.append(self.config.id2label[score.argmax()]) - label_scores.append(score.max().item()) - - return self.output_schema( - labels=labels, - scores=label_scores, - ) diff --git a/src/deepsparse/transformers/pipelines/token_classification.py b/src/deepsparse/transformers/pipelines/token_classification.py deleted file mode 100644 index 6485df668e..0000000000 --- a/src/deepsparse/transformers/pipelines/token_classification.py +++ /dev/null @@ -1,499 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# postprocessing adapted from huggingface/transformers - -# Copyright 2021 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""" -Pipeline implementation and pydantic models for token classification transformers -tasks -""" -from typing import Any, Dict, List, Optional, Tuple, Type, Union - -import numpy -from pydantic import BaseModel, Field -from transformers.file_utils import ExplicitEnum -from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy - -from deepsparse import Pipeline -from deepsparse.transformers.pipelines import TransformersPipeline - - -__all__ = [ - "AggregationStrategy", - "TokenClassificationInput", - "TokenClassificationResult", - "TokenClassificationOutput", - "TokenClassificationPipeline", -] - - -class AggregationStrategy(ExplicitEnum): - """ - Valid aggregation strategies for postprocessing in the TokenClassificationPipeline - """ - - NONE = "none" - SIMPLE = "simple" - FIRST = "first" - AVERAGE = "average" - MAX = "max" - - -class TokenClassificationInput(BaseModel): - """ - Schema for inputs to token_classification pipelines - """ - - inputs: Union[List[str], str] = Field( - description=( - "A string or List of batch of strings representing input(s) to" - "a token_classification task" - ) - ) - - -class TokenClassificationResult(BaseModel): - """ - Schema for a classification of a single token - """ - - entity: str = Field(description="entity predicted for that token/word") - score: float = Field(description="The corresponding probability for `entity`") - index: int = Field(description="index of the corresponding token in the sentence") - word: str = Field(description="token/word classified") - start: Optional[int] = Field( - description=( - "index of the start of the corresponding entity in the sentence. " - "Only exists if the offsets are available within the tokenizer" - ) - ) - end: Optional[int] = Field( - description=( - "index of the end of the corresponding entity in the sentence. " - "Only exists if the offsets are available within the tokenizer" - ) - ) - is_grouped: bool = Field( - default=False, - description="True if this result is part of an entity group", - ) - - -class TokenClassificationOutput(BaseModel): - """ - Schema for results of TokenClassificationPipeline inference. Classifications of each - token stored in a list of lists of batch[sentence[token]] - """ - - predictions: List[List[TokenClassificationResult]] = Field( - description=( - "list of list of results of token classification pipeline. Outer list " - "has one item for each sequence in the batch. Inner list has one " - "TokenClassificationResult item per token in the given sequence" - ) - ) - - -@Pipeline.register( - task="token_classification", - task_aliases=["ner"], - default_model_path=( - "zoo:nlp/token_classification/bert-base/pytorch/huggingface/" - "conll2003/12layer_pruned80_quant-none-vnni" - ), -) -class TokenClassificationPipeline(TransformersPipeline): - """ - transformers token classification pipeline - - example instantiation: - ```python - token_classifier = Pipeline.create( - task="token_classification", - model_path="token_classification_model_dir/", - batch_size=BATCH_SIZE, - ) - ``` - - :param model_path: sparsezoo stub to a transformers model, an ONNX file, or - (preferred) a directory containing a model.onnx, tokenizer config, and model - config. If no tokenizer and/or model config(s) are found, then they will be - loaded from huggingface transformers using the `default_model_name` key - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - Default is 128 - :param default_model_name: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the `model_path`. - Default is 'bert-base-uncased' - :param aggregation_strategy: how to aggregate tokens in postprocessing. Options - include 'none', 'simple', 'first', 'average', and 'max'. Default is None - :param ignore_labels: list of label names to ignore in output. Default is - ['0'] which ignores the default known class label - """ - - def __init__( - self, - *, - aggregation_strategy: AggregationStrategy = AggregationStrategy.NONE, - ignore_labels: List[str] = None, - **kwargs, - ): - - if isinstance(aggregation_strategy, str): - aggregation_strategy = aggregation_strategy.strip().lower() - self._aggregation_strategy = AggregationStrategy(aggregation_strategy) - self._ignore_labels = ["0"] if ignore_labels is None else ignore_labels - - super().__init__(**kwargs) - - @property - def aggregation_strategy(self) -> str: - """ - :return: how to aggregate tokens in postprocessing. Options - include 'none', 'simple', 'first', 'average', and 'max' - """ - return self._aggregation_strategy.value - - @property - def ignore_labels(self) -> List[str]: - """ - :return: list of label names to ignore in output. Default is - ['0'] which ignores the default known class label - """ - return self._ignore_labels - - @property - def input_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return TokenClassificationInput - - @property - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return TokenClassificationOutput - - def parse_inputs(self, *args, **kwargs) -> BaseModel: - """ - :param args: ordered arguments to pipeline, only an input_schema object - is supported as an arg for this function - :param kwargs: keyword arguments to pipeline - :return: pipeline arguments parsed into the given `input_schema` - schema if necessary. If an instance of the `input_schema` is provided - it will be returned - """ - if args and kwargs: - raise ValueError( - f"{self.__class__} only support args OR kwargs. Found " - f" {len(args)} args and {len(kwargs)} kwargs" - ) - - if args: - if len(args) == 1: - # passed input_schema schema directly - if isinstance(args[0], self.input_schema): - return args[0] - return self.input_schema(inputs=args[0]) - else: - return self.input_schema(inputs=args) - - return self.input_schema(**kwargs) - - def process_inputs( - self, - inputs: TokenClassificationInput, - ) -> Tuple[List[numpy.ndarray], Dict[str, Any]]: - """ - :param inputs: inputs to the pipeline. Must be the type of the - TokenClassificationInput - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine - and dictionary containing offset mappings and special tokens mask to - be used during postprocessing - """ - tokens = self.tokenizer( - inputs.inputs, - return_tensors="np", - truncation=TruncationStrategy.LONGEST_FIRST.value, - padding=PaddingStrategy.MAX_LENGTH.value, - return_special_tokens_mask=True, - return_offsets_mapping=self.tokenizer.is_fast, - ) - - offset_mapping = ( - tokens.pop("offset_mapping") - if self.tokenizer.is_fast - else [None] * len(inputs.inputs) - ) - special_tokens_mask = tokens.pop("special_tokens_mask") - postprocessing_kwargs = dict( - inputs=inputs, - tokens=tokens, - offset_mapping=offset_mapping, - special_tokens_mask=special_tokens_mask, - ) - - return self.tokens_to_engine_input(tokens), postprocessing_kwargs - - def process_engine_outputs( - self, - engine_outputs: List[numpy.ndarray], - **kwargs, - ) -> BaseModel: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - inputs = kwargs["inputs"] - tokens = kwargs["tokens"] - offset_mapping = kwargs["offset_mapping"] - special_tokens_mask = kwargs["special_tokens_mask"] - - predictions = [] # type: List[List[TokenClassificationResult]] - - for entities_index, current_entities in enumerate(engine_outputs[0]): - input_ids = tokens["input_ids"][entities_index] - - scores = numpy.exp(current_entities) / numpy.exp(current_entities).sum( - -1, keepdims=True - ) - pre_entities = self._gather_pre_entities( - inputs.inputs[entities_index], - input_ids, - scores, - offset_mapping[entities_index], - special_tokens_mask[entities_index], - ) - grouped_entities = self._aggregate(pre_entities) - # Filter anything that is in self.ignore_labels - current_results = [] # type: List[TokenClassificationResult] - for entity in grouped_entities: - if entity.get("entity") in self.ignore_labels or ( - entity.get("entity_group") in self.ignore_labels - ): - continue - if entity.get("entity_group"): - entity["entity"] = entity["entity_group"] - entity["is_grouped"] = True - del entity["entity_group"] - current_results.append(TokenClassificationResult(**entity)) - predictions.append(current_results) - - return self.output_schema(predictions=predictions) - - # utilities below adapted from transformers - - def _gather_pre_entities( - self, - sentence: str, - input_ids: numpy.ndarray, - scores: numpy.ndarray, - offset_mapping: Optional[List[Tuple[int, int]]], - special_tokens_mask: numpy.ndarray, - ) -> List[dict]: - pre_entities = [] - for idx, token_scores in enumerate(scores): - # Filter special_tokens, they should only occur - # at the sentence boundaries since we're not encoding pairs of - # sentences so we don't have to keep track of those. - if special_tokens_mask[idx]: - continue - - word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx])) - if offset_mapping is not None: - start_ind, end_ind = offset_mapping[idx] - word_ref = sentence[start_ind:end_ind] - is_subword = len(word_ref) != len(word) - - if int(input_ids[idx]) == self.tokenizer.unk_token_id: - word = word_ref - is_subword = False - else: - start_ind = None - end_ind = None - is_subword = False - - pre_entity = { - "word": word, - "scores": token_scores, - "start": start_ind, - "end": end_ind, - "index": idx, - "is_subword": is_subword, - } - pre_entities.append(pre_entity) - return pre_entities - - def _aggregate(self, pre_entities: List[dict]) -> List[dict]: - if self._aggregation_strategy in { - AggregationStrategy.NONE, - AggregationStrategy.SIMPLE, - }: - entities = [] - for pre_entity in pre_entities: - entity_idx = pre_entity["scores"].argmax() - score = pre_entity["scores"][entity_idx] - entity = { - "entity": self.config.id2label[entity_idx], - "score": score, - "index": pre_entity["index"], - "word": pre_entity["word"], - "start": pre_entity["start"], - "end": pre_entity["end"], - } - entities.append(entity) - else: - entities = self._aggregate_words(pre_entities) - - if self._aggregation_strategy == AggregationStrategy.NONE: - return entities - return self._group_entities(entities) - - def _aggregate_word(self, entities: List[dict]) -> dict: - word = self.tokenizer.convert_tokens_to_string( - [entity["word"] for entity in entities] - ) - if self._aggregation_strategy == AggregationStrategy.FIRST: - scores = entities[0]["scores"] - idx = scores.argmax() - score = scores[idx] - entity = self.config.id2label[idx] - elif self._aggregation_strategy == AggregationStrategy.MAX: - max_entity = max(entities, key=lambda entity: entity["scores"].max()) - scores = max_entity["scores"] - idx = scores.argmax() - score = scores[idx] - entity = self.config.id2label[idx] - elif self._aggregation_strategy == AggregationStrategy.AVERAGE: - scores = numpy.stack([entity["scores"] for entity in entities]) - average_scores = numpy.nanmean(scores, axis=0) - entity_idx = average_scores.argmax() - entity = self.config.id2label[entity_idx] - score = average_scores[entity_idx] - else: - raise ValueError( - f"Invalid aggregation_strategy: {self._aggregation_strategy}" - ) - new_entity = { - "entity": entity, - "score": score, - "word": word, - "start": entities[0]["start"], - "end": entities[-1]["end"], - } - return new_entity - - def _aggregate_words(self, entities: List[dict]) -> List[dict]: - word_entities = [] - word_group = None - for entity in entities: - if word_group is None: - word_group = [entity] - elif entity["is_subword"]: - word_group.append(entity) - else: - word_entities.append(self._aggregate_word(word_group)) - word_group = [entity] - # Last item - word_entities.append(self._aggregate_word(word_group)) - return word_entities - - def _group_sub_entities(self, entities: List[dict]) -> dict: - # Get the first entity in the entity group - entity = entities[0]["entity"].split("-")[-1] - scores = numpy.nanmean([entity["score"] for entity in entities]) - tokens = [entity["word"] for entity in entities] - - entity_group = { - "entity_group": entity, - "score": numpy.mean(scores), - "word": self.tokenizer.convert_tokens_to_string(tokens), - "start": entities[0]["start"], - "end": entities[-1]["end"], - } - return entity_group - - def _get_tag(self, entity_name: str) -> Tuple[str, str]: - if entity_name.startswith("B-"): - bi = "B" - tag = entity_name[2:] - elif entity_name.startswith("I-"): - bi = "I" - tag = entity_name[2:] - else: - # It's not in B-, I- format - bi = "B" - tag = entity_name - return bi, tag - - def _group_entities(self, entities: List[dict]) -> List[dict]: - - entity_groups = [] - entity_group_disagg = [] - - for entity in entities: - if not entity_group_disagg: - entity_group_disagg.append(entity) - continue - - # If the current entity is similar and adjacent to the previous entity, - # append it to the disaggregated entity group - # The split is meant to account for the "B" and "I" prefixes - # Shouldn't merge if both entities are B-type - bi, tag = self._get_tag(entity["entity"]) - last_bi, last_tag = self._get_tag(entity_group_disagg[-1]["entity"]) - - if tag == last_tag and bi != "B": - # Modify subword type to be previous_type - entity_group_disagg.append(entity) - else: - # If the current entity is different from the previous entity - # aggregate the disaggregated entity group - entity_groups.append(self._group_sub_entities(entity_group_disagg)) - entity_group_disagg = [entity] - if entity_group_disagg: - # it's the last entity, add it to the entity groups - entity_groups.append(self._group_sub_entities(entity_group_disagg)) - - return entity_groups diff --git a/src/deepsparse/transformers/server.py b/src/deepsparse/transformers/server.py new file mode 100644 index 0000000000..59035dba80 --- /dev/null +++ b/src/deepsparse/transformers/server.py @@ -0,0 +1,186 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Specs, schemas, and pipelines for use when serving transformers models +""" + +from typing import Any, Dict, List, Optional, Tuple, Union + +from deepsparse.tasks import SupportedTasks +from deepsparse.transformers.pipelines import Pipeline, pipeline + + +try: + from deepsparse.server.config import ServeModelConfig + + deepsparse_server_err = None +except Exception as _err: + deepsparse_server_err = _err + ServeModelConfig = object + +try: + from pydantic import BaseModel, Field + + pydantic_import_err = None +except Exception as _err: + pydantic_import_err = _err + BaseModel = object + Field = dict + + +__all__ = [ + "create_pipeline_definitions", + "QuestionAnsweringRequest", + "QuestionAnsweringResponse", + "TextClassificationRequest", + "TextClassificationResponse", + "TokenClassificationRequest", + "TokenClassificationResponse", +] + + +def create_pipeline_definitions( + model_config: ServeModelConfig, +) -> Tuple[Pipeline, Any, Any, Dict]: + """ + Create a pipeline definition and the supporting files for a given model config + to use for serving in the DeepSparse inference server + + :param model_config: the server model config describing the model and params + :return: a tuple containing (the pipeline to use for inference, + the expected request body, the expected response body, + any additional keyword args for use with the server) + """ + if deepsparse_server_err: + raise deepsparse_server_err + + if pydantic_import_err: + raise pydantic_import_err + + if SupportedTasks.nlp.question_answering.matches(model_config.task): + request_model = QuestionAnsweringRequest + response_model = Union[ + List[QuestionAnsweringResponse], + QuestionAnsweringResponse, + ] + kwargs = {} + elif SupportedTasks.nlp.text_classification.matches(model_config.task): + request_model = TextClassificationRequest + response_model = Union[ + List[TextClassificationResponse], List[List[TextClassificationResponse]] + ] + kwargs = {} + elif SupportedTasks.nlp.token_classification.matches(model_config.task): + request_model = TokenClassificationRequest + response_model = Union[ + List[TokenClassificationResponse], List[List[TokenClassificationResponse]] + ] + kwargs = {} + else: + raise ValueError( + f"unrecognized task given of {model_config.task} for config {model_config}" + ) + + pipeline_instance: Pipeline = pipeline( + task=model_config.task.lower().replace("_", "-"), + model_path=model_config.model_path, + engine_type=model_config.engine, + num_cores=model_config.num_cores, + scheduler=model_config.scheduler, + batch_size=model_config.batch_size, + **model_config.kwargs, + ) + + return pipeline_instance, request_model, response_model, kwargs + + +class QuestionAnsweringRequest(BaseModel): + """ + The request model for Question Answering Task + """ + + question: Union[List[str], str] = Field( + description="Either a string or a List of string questions to answer" + ) + context: Union[List[str], str] = Field( + description="Either a string or List of strings representing the context " + "for each question" + ) + + +class TokenClassificationRequest(BaseModel): + """ + Schema for TokenClassificationPipeline Request + """ + + inputs: Union[List[str], str] = Field( + description="A string or List of strings representing input to" + "TokenClassificationPipeline task" + ) + + +class TextClassificationRequest(BaseModel): + """ + Schema for TextClassificationPipeline Request + """ + + sequences: Union[List[str], str] = Field( + description="A string or List of strings representing input to" + "TextClassificationPipeline task" + ) + + +class QuestionAnsweringResponse(BaseModel): + """ + Schema for a result from Question Answering Task + """ + + score: float = Field(description="confidence score for prediction") + start: int = Field(description="The start index of the answer") + end: int = Field(description="The end index of the answer") + answer: str = Field(description="The predicted answer") + + +class TokenClassificationResponse(BaseModel): + """ + Schema for TokenClassificationPipeline Response + """ + + entity: str = Field( + description="The entity predicted for that token/word (it is named" + "`entity_group` when `aggregation_strategy` is not `none`." + ) + score: float = Field(description="The corresponding probability for `entity`.") + index: int = Field( + description="The index of the corresponding token in the sentence." + ) + word: str = Field(description="The token/word classified.") + start: Optional[int] = Field( + description="The index of the start of the corresponding entity in the " + "sentence. Only exists if the offsets are available within the tokenizer" + ) + end: Optional[int] = Field( + description="The index of the end of the corresponding entity in the sentence. " + "Only exists if the offsets are available within the tokenizer" + ) + + +class TextClassificationResponse(BaseModel): + """ + Schema for TextClassificationPipeline Response + """ + + label: str = Field(description="The label predicted.") + score: float = Field(description="The corresponding probability.") diff --git a/src/deepsparse/yolo/__init__.py b/src/deepsparse/yolo/__init__.py deleted file mode 100644 index 0c44f887a4..0000000000 --- a/src/deepsparse/yolo/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/src/deepsparse/yolo/annotate.py b/src/deepsparse/yolo/annotate.py deleted file mode 100644 index 72f7770934..0000000000 --- a/src/deepsparse/yolo/annotate.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Usage: deepsparse.object_detection.annotate [OPTIONS] - - Annotation Script for YOLO with DeepSparse - -Options: - --model_filepath, --model-filepath TEXT - Path/SparseZoo stub to the model file to be - used for annotation [default: zoo:cv/detect - ion/yolov5-s/pytorch/ultralytics/coco/pruned - -aggressive_96] - --source TEXT File path to image or directory of .jpg - files, a .mp4 video, or an integer (i.e. 0) - for webcam [required] - --engine [deepsparse|onnxruntime|torch] - Inference engine backend to run on. Choices - are 'deepsparse', 'onnxruntime', and - 'torch'. Default is 'deepsparse' - --image_shape, --image_shape INTEGER... - Image shape to use for inference, must be - two integers [default: 640, 640] - --num_cores, --num-cores INTEGER - The number of physical cores to run the - annotations with, defaults to using all - physical cores available on the system. For - DeepSparse benchmarks, this value is the - number of cores per socket - --save_dir, --save-dir DIRECTORY - The path to the directory for saving results - [default: annotation-results] - --name TEXT Name of directory in save-dir to write - results to. defaults to - {engine}-annotations-{run_number} - --target_fps, --target-fps FLOAT - Target FPS when writing video files. Frames - will be dropped to closely match target FPS. - --source must be a video file and if target- - fps is greater than the source video fps - then it will be ignored - --no_save, --no-save Set flag when source is from webcam to not - save results.Not supported for non-webcam - sources [default: False] - --help Show this message and exit. - -####### -Examples: - -1) deepsparse.object_detection.annotate --source PATH/TO/IMAGE.jpg -2) deepsparse.object_detection.annotate --source PATH/TO/VIDEO.mp4 -3) deepsparse.object_detection.annotate --source 0 -4) deepsparse.object_detection.annotate --source PATH/TO/IMAGE_DIR -""" -import logging -from typing import Optional - -import click - -import cv2 -from deepsparse.pipeline import Pipeline -from deepsparse.yolo import utils -from deepsparse.yolo.utils.cli_helpers import create_dir_callback - - -yolo_v5_default_stub = ( - "zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/" "pruned-aggressive_96" -) - -DEEPSPARSE_ENGINE = "deepsparse" -ORT_ENGINE = "onnxruntime" -TORCH_ENGINE = "torch" - -_LOGGER = logging.getLogger(__name__) - - -@click.command() -@click.option( - "--model_filepath", - "--model-filepath", - type=str, - default=yolo_v5_default_stub, - help="Path/SparseZoo stub to the model file to be used for annotation", - show_default=True, -) -@click.option( - "--source", - type=str, - required=True, - help="File path to image or directory of .jpg files, a .mp4 video, " - "or an integer (i.e. 0) for webcam", -) -@click.option( - "--engine", - type=click.Choice([DEEPSPARSE_ENGINE, ORT_ENGINE, TORCH_ENGINE]), - default=DEEPSPARSE_ENGINE, - help="Inference engine backend to run on. Choices are 'deepsparse', " - "'onnxruntime', and 'torch'. Default is 'deepsparse'", -) -@click.option( - "--image_shape", - "--image_shape", - type=int, - nargs=2, - default=(640, 640), - help="Image shape to use for inference, must be two integers", - show_default=True, -) -@click.option( - "--num_cores", - "--num-cores", - type=int, - default=None, - help="The number of physical cores to run the annotations with, " - "defaults to using all physical cores available on the system." - " For DeepSparse benchmarks, this value is the number of cores " - "per socket", - show_default=True, -) -@click.option( - "--save_dir", - "--save-dir", - type=click.Path(dir_okay=True, file_okay=False), - default="annotation-results", - callback=create_dir_callback, - help="The path to the directory for saving results", - show_default=True, -) -@click.option( - "--name", - type=str, - default=None, - help="Name of directory in save-dir to write results to. defaults to " - "{engine}-annotations-{run_number}", -) -@click.option( - "--target_fps", - "--target-fps", - type=float, - default=None, - help="Target FPS when writing video files. Frames will be dropped to " - "closely match target FPS. --source must be a video file and if " - "target-fps is greater than the source video fps then it " - "will be ignored", - show_default=True, -) -@click.option( - "--no_save", - "--no-save", - is_flag=True, - help="Set flag when source is from webcam to not save results." - "Not supported for non-webcam sources", - show_default=True, -) -def main( - model_filepath: str, - source: str, - engine: str, - image_shape: tuple, - num_cores: Optional[int], - save_dir: str, - name: Optional[str], - target_fps: Optional[float], - no_save: bool, -) -> None: - """ - Annotation Script for YOLO with DeepSparse - """ - save_dir = utils.get_annotations_save_dir( - initial_save_dir=save_dir, - tag=name, - engine=engine, - ) - - loader, saver, is_video = utils.get_yolo_loader_and_saver( - path=source, - save_dir=save_dir, - image_shape=image_shape, - target_fps=target_fps, - no_save=no_save, - ) - - is_webcam = source.isnumeric() - yolo_pipeline = Pipeline.create( - task="yolo", - model_path=model_filepath, - class_names="coco", - engine_type=engine, - num_cores=num_cores, - ) - - for iteration, (input_image, source_image) in enumerate(loader): - - # annotate - annotated_images = utils.annotate( - pipeline=yolo_pipeline, - image_batch=input_image, - target_fps=target_fps, - calc_fps=is_video, - original_images=[source_image], - ) - - for annotated_image in annotated_images: - # display - if is_webcam: - cv2.imshow("annotated", annotated_image) - cv2.waitKey(1) - - # save - if saver: - saver.save_frame(annotated_image) - - if saver: - saver.close() - - _LOGGER.info(f"Results saved to {save_dir}") - - -if __name__ == "__main__": - main() diff --git a/src/deepsparse/yolo/pipelines.py b/src/deepsparse/yolo/pipelines.py deleted file mode 100644 index 2398313c31..0000000000 --- a/src/deepsparse/yolo/pipelines.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Dict, List, Optional, Tuple, Type, Union - -import numpy -import onnx - -from deepsparse.pipeline import Pipeline -from deepsparse.utils import model_to_path -from deepsparse.yolo.schemas import YOLOInput, YOLOOutput -from deepsparse.yolo.utils import COCO_CLASSES, YoloPostprocessor, postprocess_nms - - -try: - import cv2 - - cv2_error = None -except ModuleNotFoundError as cv2_import_error: - cv2 = None - cv2_error = cv2_import_error - - -@Pipeline.register( - task="yolo", - default_model_path=( - "zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned_quant-aggressive_95" - ), -) -class YOLOPipeline(Pipeline): - """ - Image Segmentation YOLO pipeline for DeepSparse - - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param class_names: Optional string identifier, dict, or json file of - class names to use for mapping class ids to class labels. Default is - `coco` - """ - - def __init__( - self, - *, - class_names: Optional[Union[str, Dict[str, str]]] = "coco", - model_config: Optional[str] = None, - **kwargs, - ): - super().__init__( - **kwargs, - ) - - if isinstance(class_names, str): - if class_names.endswith(".json"): - class_names = json.load(open(class_names)) - elif class_names == "coco": - class_names = COCO_CLASSES - else: - raise ValueError(f"Unknown class_names: {class_names}") - - if isinstance(class_names, dict): - self._class_names = class_names - elif isinstance(class_names, list): - self._class_names = { - str(index): class_name for index, class_name in enumerate(class_names) - } - else: - raise ValueError( - "class_names must be a str identifier, dict, json file, or " - f"list of class names got {type(class_names)}" - ) - - onnx_model = onnx.load(self.onnx_file_path) - self.has_postprocessing = self.model_has_postprocessing( - loaded_onnx_model=onnx_model, - ) - self.input_shape = self._infer_image_shape(onnx_model=onnx_model) - self.is_quantized = self.model_is_quantized(onnx_model=onnx_model) - self.postprocessor = ( - None - if self.has_postprocessing - else YoloPostprocessor( - image_size=self.input_shape, - cfg=model_config, - ) - ) - self._model_config = model_config - - @property - def model_config(self) -> str: - return self._model_config - - @property - def class_names(self) -> Optional[Dict[str, str]]: - return self._class_names - - @property - def input_schema(self) -> Type[YOLOInput]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return YOLOInput - - @property - def output_schema(self) -> Type[YOLOOutput]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return YOLOOutput - - def setup_onnx_file_path(self) -> str: - """ - Performs any setup to unwrap and process the given `model_path` and other - class properties into an inference ready onnx file to be compiled by the - engine of the pipeline - - :return: file path to the ONNX file for the engine to compile - """ - return model_to_path(self.model_path) - - def process_inputs(self, inputs: YOLOInput) -> List[numpy.ndarray]: - """ - :param inputs: inputs to the pipeline. Must be the type of the `input_schema` - of this pipeline - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine - """ - image_batch = [] - - if isinstance(inputs.images, str): - inputs.images = [inputs.images] - - for image in inputs.images: - if isinstance(image, str): - image = cv2.imread(image) - image = cv2.resize(image, dsize=self.input_shape) - image = image[:, :, ::-1].transpose(2, 0, 1) - - image_batch.append(image) - - image_batch = numpy.stack(image_batch, axis=0) - image_batch = numpy.ascontiguousarray( - image_batch, - dtype=numpy.int8 if self.is_quantized else numpy.float32, - ) - image_batch /= 255 - - return [image_batch] - - def process_engine_outputs( - self, - engine_outputs: List[numpy.ndarray], - ) -> YOLOOutput: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - - # post-processing - if self.postprocessor: - batch_output = self.postprocessor.pre_nms_postprocess(engine_outputs) - else: - batch_output = engine_outputs[ - 0 - ] # post-processed values stored in first output - - # NMS - batch_output = postprocess_nms(batch_output) - - batch_predictions, batch_boxes, batch_scores, batch_labels = [], [], [], [] - - for image_output in batch_output: - batch_predictions.append(image_output.tolist()) - batch_boxes.append(image_output[:, 0:4].tolist()) - batch_scores.append(image_output[:, 4].tolist()) - batch_labels.append( - [ - self.class_names[str(class_ids)] - for class_ids in image_output[:, 5].astype(int) - ] - ) - - return YOLOOutput( - predictions=batch_predictions, - boxes=batch_boxes, - scores=batch_scores, - labels=batch_labels, - ) - - def _infer_image_shape(self, onnx_model) -> Tuple[int, ...]: - """ - Infer and return the expected shape of the input tensor - - :return: The expected shape of the input tensor from onnx graph - """ - input_tensor = onnx_model.graph.input[0] - return ( - input_tensor.type.tensor_type.shape.dim[2].dim_value, - input_tensor.type.tensor_type.shape.dim[3].dim_value, - ) - - def model_has_postprocessing(self, loaded_onnx_model) -> bool: - """ - :return: True if loaded_onnx_model has postprocessing, False otherwise - """ - # get number of dimensions in each output - outputs_num_dims = [ - len(output.type.tensor_type.shape.dim) - for output in loaded_onnx_model.graph.output - ] - - # assume if only one output, then it is post-processed - if len(outputs_num_dims) == 1: - return True - - return all(num_dims > outputs_num_dims[0] for num_dims in outputs_num_dims[1:]) - - def model_is_quantized(self, onnx_model) -> bool: - """ - :return: True if loaded_onnx_model is quantized, False otherwise - """ - return ( - onnx_model.graph.input[0].type.tensor_type.elem_type - == onnx.TensorProto.UINT8 - ) diff --git a/src/deepsparse/yolo/schemas.py b/src/deepsparse/yolo/schemas.py deleted file mode 100644 index f60357dfb5..0000000000 --- a/src/deepsparse/yolo/schemas.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""" -Input/Output Schemas for Image Segmentation with YOLO -""" -from collections import namedtuple -from typing import List, Union - -import numpy -from pydantic import BaseModel - - -__all__ = [ - "YOLOOutput", - "YOLOInput", -] - -_YOLOImageOutput = namedtuple( - "_YOLOImageOutput", ["predictions", "boxes", "scores", "labels"] -) - - -class YOLOInput(BaseModel): - """ - Input model for image classification - """ - - images: Union[str, List[numpy.ndarray], List[str]] - - class Config: - arbitrary_types_allowed = True - - -class YOLOOutput(BaseModel): - """ - Output model for image classification - """ - - predictions: List[List[List[float]]] - boxes: List[List[List[float]]] - scores: List[List[float]] - labels: List[List[str]] - - def __getitem__(self, index): - if index >= len(self.predictions): - raise IndexError("Index out of range") - - return _YOLOImageOutput( - self.predictions[index], - self.boxes[index], - self.scores[index], - self.labels[index], - ) - - def __iter__(self): - for index in range(len(self.predictions)): - yield self[index] diff --git a/src/deepsparse/yolo/utils/__init__.py b/src/deepsparse/yolo/utils/__init__.py deleted file mode 100644 index 5344738df6..0000000000 --- a/src/deepsparse/yolo/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# flake8: noqa - -from .coco_classes import * -from .utils import * diff --git a/src/deepsparse/yolo/utils/cli_helpers.py b/src/deepsparse/yolo/utils/cli_helpers.py deleted file mode 100644 index ccd366236f..0000000000 --- a/src/deepsparse/yolo/utils/cli_helpers.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Optional, Union - - -def parse_device( - ctx, - params, - value: Optional[Union[str, int]], -) -> Optional[Union[str, int]]: - """ - :param ctx: The click context - :param params: The click params - :param value: The device value to parse - :return: The correct inferred device - """ - try: - return int(value) - except (ValueError, TypeError): - return value - - -def create_dir_callback(ctx, params, value: str): - """ - Create and return directory if it doesn't exist. - - :param ctx: The click context - :param params: The click params - :param value: The value to create the directory from - :returns: The directory path - """ - os.makedirs(value, exist_ok=True) - return value diff --git a/src/deepsparse/yolo/utils/coco_classes.py b/src/deepsparse/yolo/utils/coco_classes.py deleted file mode 100644 index 5e67829d8f..0000000000 --- a/src/deepsparse/yolo/utils/coco_classes.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -COCO_CLASSES = [ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "dining table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush", -] diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py deleted file mode 100644 index 0e14aad9fe..0000000000 --- a/src/deepsparse/yolo/utils/utils.py +++ /dev/null @@ -1,795 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Helpers and Utilities for YOLO -""" -import functools -import glob -import itertools -import logging -import os -import random -import shutil -import time -from pathlib import Path -from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union - -import numpy -import onnx -import yaml - -import torch -import torchvision -from sparsezoo.utils import create_dirs - - -try: - import cv2 - - cv2_error = None -except ModuleNotFoundError as cv2_import_error: - cv2 = None - cv2_error = cv2_import_error - -_YOLO_CLASS_COLORS = list(itertools.product([0, 255, 128, 64, 192], repeat=3)) -_YOLO_CLASS_COLORS.remove((255, 255, 255)) # remove white from possible colors -_LOGGER = logging.getLogger(__name__) - -# Default YOLO anchor grids -_YOLO_DEFAULT_ANCHORS = [ - torch.Tensor([[10, 13], [16, 30], [33, 23]]), - torch.Tensor([[30, 61], [62, 45], [59, 119]]), - torch.Tensor([[116, 90], [156, 198], [373, 326]]), -] -_YOLO_DEFAULT_ANCHOR_GRIDS = [ - t.clone().view(1, -1, 1, 1, 2) for t in _YOLO_DEFAULT_ANCHORS -] - - -@functools.lru_cache(maxsize=None) -def _get_color(label): - # cache color lookups - return random.choice(_YOLO_CLASS_COLORS) - - -class YoloPostprocessor: - """ - Class for performing post-processing of YOLO model predictions - - :param image_size: size of input image to model. used to calculate stride based on - output shapes - """ - - def __init__( - self, image_size: Tuple[int, int] = (640, 640), cfg: Optional[str] = None - ): - self._image_size = image_size - self._anchor_grids = ( - self._load_cfg_anchor_grid(cfg) if cfg else _YOLO_DEFAULT_ANCHOR_GRIDS - ) - self._grids = {} # Dict[Tuple[int], torch.Tensor] - - def pre_nms_postprocess(self, outputs: List[numpy.ndarray]) -> torch.Tensor: - """ - :param outputs: raw outputs of a YOLO model before anchor grid processing - :return: post-processed model outputs without NMS. - """ - # postprocess and transform raw outputs into single torch tensor - processed_outputs = [] - for idx, pred in enumerate(outputs): - pred = torch.from_numpy(pred) - pred = pred.sigmoid() - - # get grid and stride - grid_shape = pred.shape[2:4] - grid = self._get_grid(grid_shape) - stride = self._image_size[0] / grid_shape[0] - - # decode xywh box values - pred[..., 0:2] = (pred[..., 0:2] * 2.0 - 0.5 + grid) * stride - pred[..., 2:4] = (pred[..., 2:4] * 2) ** 2 * self._anchor_grids[idx] - # flatten anchor and grid dimensions -> - # (bs, num_predictions, num_classes + 5) - processed_outputs.append(pred.view(pred.size(0), -1, pred.size(-1))) - return torch.cat(processed_outputs, 1) - - def _get_grid(self, grid_shape: Tuple[int, int]) -> torch.Tensor: - if grid_shape not in self._grids: - # adapted from yolov5.yolo.Detect._make_grid - coords_y, coords_x = torch.meshgrid( - [torch.arange(grid_shape[0]), torch.arange(grid_shape[1])] - ) - grid = torch.stack((coords_x, coords_y), 2) - self._grids[grid_shape] = grid.view( - 1, 1, grid_shape[0], grid_shape[1], 2 - ).float() - return self._grids[grid_shape] - - @staticmethod - def _load_cfg_anchor_grid(cfg: str) -> List[torch.Tensor]: - with open(cfg) as f: - anchors = yaml.safe_load(f)["anchors"] - - def _split_to_coords(coords_list): - return [ - [coords_list[idx], coords_list[idx + 1]] - for idx in range(0, len(coords_list), 2) - ] - - anchors = [torch.Tensor(_split_to_coords(coords)) for coords in anchors] - return [t.clone().view(1, -1, 1, 1, 2) for t in anchors] - - -def postprocess_nms(outputs: Union[torch.Tensor, numpy.ndarray]) -> List[numpy.ndarray]: - """ - :param outputs: Tensor of post-processed model outputs - :return: List of numpy arrays of NMS predictions for each image in the batch - """ - # run nms in PyTorch, only post-process first output - if isinstance(outputs, numpy.ndarray): - outputs = torch.from_numpy(outputs) - nms_outputs = _non_max_suppression(outputs) - return [output.cpu().numpy() for output in nms_outputs] - - -def _non_max_suppression( - prediction, - conf_thres=0.25, - iou_thres=0.45, - classes=None, - agnostic=False, - multi_label=False, - labels=(), -): - # Ported from ultralytics/yolov5 - - nc = prediction.shape[2] - 5 # number of classes - xc = prediction[..., 4] > conf_thres # candidates - - # Checks - assert 0 <= conf_thres <= 1, ( - f"Invalid Confidence threshold {conf_thres}, " - "valid values are between 0.0 and 1.0" - ) - assert ( - 0 <= iou_thres <= 1 - ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" - - # Settings - _, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height - max_det = 300 # maximum number of detections per image - max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 10.0 # seconds to quit after - redundant = True # require redundant detections - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - merge = False # use merge-NMS - - t = time.time() - output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 - x = x[xc[xi]] # confidence - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - label_ = labels[xi] - v = torch.zeros((len(label_), nc + 5), device=x.device) - v[:, :4] = label_[:, 1:5] # box - v[:, 4] = 1.0 # conf - v[range(len(label_)), label_[:, 0].long() + 5] = 1.0 # cls - x = torch.cat((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Compute conf - x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = _xywh2xyxy(x[:, :4]) - - # Detections matrix nx6 (xyxy, conf, cls) - if multi_label: - i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) - else: # best class only - conf, j = x[:, 5:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Apply finite constraint - # if not torch.isfinite(x).all(): - # x = x[torch.isfinite(x).all(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - elif n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores - i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS - if i.shape[0] > max_det: # limit detections - i = i[:max_det] - if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) - # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) - iou = _box_iou(boxes[i], boxes) > iou_thres # iou matrix - weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( - 1, keepdim=True - ) # merged boxes - if redundant: - i = i[iou.sum(1) > 1] # require redundancy - - output[xi] = x[i] - if (time.time() - t) > time_limit: - print(f"WARNING: NMS time limit {time_limit}s exceeded") - break # time limit exceeded - - return output - - -def _xywh2xyxy( - x: Union[torch.Tensor, numpy.ndarray] -) -> Union[torch.Tensor, numpy.ndarray]: - # ported from ultralytics/yolov5 - # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] - # where xy1=top-left, xy2=bottom-right - y = x.clone() if isinstance(x, torch.Tensor) else numpy.copy(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x - y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y - y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x - y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y - return y - - -def _box_iou(box1: torch.Tensor, box2: torch.Tensor) -> torch.Tensor: - # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py - """ - Return intersection-over-union (Jaccard index) of boxes. - Both sets of boxes are expected to be in (x1, y1, x2, y2) format. - Arguments: - box1 (Tensor[N, 4]) - box2 (Tensor[M, 4]) - Returns: - iou (Tensor[N, M]): the NxM matrix containing the pairwise - IoU values for every element in boxes1 and boxes2 - """ - - def box_area(box): - # box = 4xn - return (box[2] - box[0]) * (box[3] - box[1]) - - area1 = box_area(box1.T) - area2 = box_area(box2.T) - - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = ( - ( - torch.min(box1[:, None, 2:], box2[:, 2:]) - - torch.max(box1[:, None, :2], box2[:, :2]) - ) - .clamp(0) - .prod(2) - ) - return inter / ( - area1[:, None] + area2 - inter - ) # iou = inter / (area1 + area2 - inter) - - -def yolo_onnx_has_postprocessing(model_path: str) -> bool: - """ - :param model_path: file path to YOLO ONNX model - :return: True if YOLO postprocessing (pre-nms) is included in the ONNX graph, - this is assumed to be when the first output of the model has fewer dimensions - than the other outputs as the grid dimensions have been flattened - """ - model = onnx.load(model_path) - - # get number of dimensions in each output - outputs_num_dims = [ - len(output.type.tensor_type.shape.dim) for output in model.graph.output - ] - - # assume if only one output, then it is post-processed - if len(outputs_num_dims) == 1: - return True - - return all(num_dims > outputs_num_dims[0] for num_dims in outputs_num_dims[1:]) - - -def annotate( - pipeline: "YOLOPipeline", # noqa: F821 - image_batch: Union[List[numpy.ndarray], List[str]], - target_fps: float = None, - calc_fps: bool = False, - original_images: Optional[Union[List[numpy.ndarray], numpy.ndarray]] = None, -) -> List[numpy.ndarray]: - """ - Annotated and return image_batch with bounding boxes and labels - - :param pipeline: A YOLOPipeline object - :param image_batch: A list of image files, or batch of numpy image_batch - :param target_fps: If not None, then the pipeline will be run at this target - :param calc_fps: If True, and target_fps is None then the pipeline will - calculate the FPS - :param original_images: images from input_batch before any processing - :return: A list of annotated images - - """ - - if not isinstance(image_batch, list): - image_batch = [image_batch] - - if not original_images: - original_images = image_batch - - batch_size = len(image_batch) - if image_batch and isinstance(image_batch[0], str): - original_images = [cv2.imread(image) for image in image_batch] - - if target_fps is None and calc_fps: - start = time.time() - - pipeline_outputs = pipeline(images=image_batch) - - if target_fps is None and calc_fps: - target_fps = float(batch_size) / (time.time() - start) - - annotated_images = [] - for index, image_output in enumerate(pipeline_outputs): - image = original_images[index] - result = _annotate_image( - img=image, - boxes=image_output.boxes, - labels=image_output.labels, - scores=image_output.scores, - model_input_size=pipeline.input_shape, - images_per_sec=target_fps, - ) - annotated_images.append(result) - - return annotated_images - - -def _annotate_image( - img: numpy.ndarray, - boxes: List[List[float]], - scores: List[float], - labels: List[str], - score_threshold: float = 0.35, - model_input_size: Tuple[int, int] = None, - images_per_sec: Optional[float] = None, -) -> numpy.ndarray: - """ - Draws bounding boxes on predictions of a detection model - - :param img: Original image to annotate (no pre-processing needed) - :param boxes: List of bounding boxes (x1, y1, x2, y2) - :param scores: List of scores for each bounding box - :param labels: List of labels for each bounding box - :param score_threshold: minimum score a detection should have to be annotated - on the image. Default is 0.35 - :param model_input_size: 2-tuple of expected input size for the given model to - be used for bounding box scaling with original image. Scaling will not - be applied if model_input_size is None. Default is None - :param images_per_sec: optional image_batch per second to annotate the left corner - of the image with - :return: the original image annotated with the given bounding boxes - """ - img_res = numpy.copy(img) - - scale_y = img.shape[0] / (1.0 * model_input_size[0]) if model_input_size else 1.0 - scale_x = img.shape[1] / (1.0 * model_input_size[1]) if model_input_size else 1.0 - - for idx in range(len(boxes)): - label = labels[idx] - if scores[idx] > score_threshold: - annotation_text = f"{label}: {scores[idx]:.0%}" - - # bounding box points - left = boxes[idx][0] * scale_x - top = boxes[idx][1] * scale_y - right = boxes[idx][2] * scale_x - bottom = boxes[idx][3] * scale_y - - # calculate text size - (text_width, text_height), text_baseline = cv2.getTextSize( - annotation_text, - cv2.FONT_HERSHEY_SIMPLEX, - 0.9, # font scale - 2, # thickness - ) - text_height += text_baseline - - # make solid background for annotation text - cv2.rectangle( - img_res, - (int(left), int(top) - 33), - (int(left) + text_width, int(top) - 28 + text_height), - _get_color(label), - thickness=-1, # filled solid - ) - - # add white annotation text - cv2.putText( - img_res, - annotation_text, - (int(left), int(top) - 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.9, # font scale - (255, 255, 255), # white text - 2, # thickness - cv2.LINE_AA, - ) - - # draw bounding box - cv2.rectangle( - img_res, - (int(left), int(top)), - (int(right), int(bottom)), - _get_color(label), - thickness=2, - ) - - if images_per_sec is not None: - cv2.putText( - img_res, - f"images_per_sec: {int(images_per_sec)}", - (50, 50), - cv2.FONT_HERSHEY_SIMPLEX, - 2.0, # font scale - (245, 46, 6), # color - 2, # thickness - cv2.LINE_AA, - ) - return img_res - - -def get_yolo_loader_and_saver( - path: str, - save_dir: str, - image_shape: Tuple[int, int] = (640, 640), - target_fps: Optional[float] = None, - no_save: bool = False, -) -> Union[Iterable, Any, bool]: - """ - - :param path: file path to image or directory of .jpg files, a .mp4 video, - or an integer (i.e. 0) for web-cam - :param save_dir: path of directory to save to - :param image_shape: size of input image_batch to model - :param target_fps: fps to save potential video at - :param no_save: set true if not saving results of processing - :return: image loader iterable, result saver objects - image_batch, video, or web-cam based on path given, and a boolean value - that is True is the returned objects load videos - """ - # video - if path.endswith(".mp4"): - loader = YoloVideoLoader(path, image_shape) - saver = VideoSaver( - save_dir, - loader.original_fps, - loader.original_frame_size, - target_fps, - ) - return loader, saver, True - # webcam - if path.isnumeric(): - loader = YoloWebcamLoader(int(path), image_shape) - saver = ( - VideoSaver(save_dir, 30, loader.original_frame_size, None) - if not no_save - else None - ) - return loader, saver, True - # image file(s) - return YoloImageLoader(path, image_shape), ImagesSaver(save_dir), False - - -class YoloImageLoader: - """ - Class for pre-processing and iterating over image_batch to be used as input for YOLO - models - - :param path: Filepath to single image file or directory of image files to load, - glob paths also valid - :param image_size: size of input image_batch to model - """ - - def __init__(self, path: str, image_size: Tuple[int, int] = (640, 640)): - self._path = path - self._image_size = image_size - - if os.path.isdir(path): - self._image_file_paths = [ - os.path.join(path, file_name) for file_name in os.listdir(path) - ] - elif "*" in path: - self._image_file_paths = glob.glob(path) - elif os.path.isfile(path): - # single file - self._image_file_paths = [path] - else: - raise ValueError(f"{path} is not a file, glob, or directory") - - def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: - for image_path in self._image_file_paths: - yield load_image(image_path, image_size=self._image_size) - - -class YoloVideoLoader: - """ - Class for pre-processing and iterating over video frames to be used as input for - YOLO models - - :param path: Filepath to single video file - :param image_size: size of input image_batch to model - """ - - def __init__(self, path: str, image_size: Tuple[int, int] = (640, 640)): - self._path = path - self._image_size = image_size - self._vid = cv2.VideoCapture(self._path) - self._total_frames = int(self._vid.get(cv2.CAP_PROP_FRAME_COUNT)) - self._fps = self._vid.get(cv2.CAP_PROP_FPS) - - def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: - for _ in range(self._total_frames): - loaded, frame = self._vid.read() - if not loaded: - break - yield load_image(frame, image_size=self._image_size) - self._vid.release() - - @property - def original_fps(self) -> float: - """ - :return: the frames per second of the video this object reads - """ - return self._fps - - @property - def original_frame_size(self) -> Tuple[int, int]: - """ - :return: the original size of frames in the video this object reads - """ - return ( - int(self._vid.get(cv2.CAP_PROP_FRAME_WIDTH)), - int(self._vid.get(cv2.CAP_PROP_FRAME_HEIGHT)), - ) - - @property - def total_frames(self) -> int: - """ - :return: the total number of frames this object may laod from the video - """ - return self._total_frames - - -class YoloWebcamLoader: - """ - Class for pre-processing and iterating over webcam frames to be used as input for - YOLO models. - - Adapted from: https://github.com/ultralytics/yolov5/blob/master/utils/datasets.py - - :param camera: Webcam index - :param image_size: size of input image_batch to model - """ - - def __init__(self, camera: int, image_size: Tuple[int, int] = (640, 640)): - - self._camera = camera - self._image_size = image_size - self._stream = cv2.VideoCapture(self._camera) - self._stream.set(cv2.CAP_PROP_BUFFERSIZE, 3) - - def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: - while True: - if cv2.waitKey(1) == ord("q"): # q to quit - self._stream.release() - cv2.destroyAllWindows() - break - loaded, frame = self._stream.read() - - assert loaded, f"Could not load image from webcam {self._camera}" - - frame = cv2.flip(frame, 1) # flip left-right - yield load_image(frame, image_size=self._image_size) - - @property - def original_frame_size(self) -> Tuple[int, int]: - """ - :return: the original size of frames in the stream this object reads - """ - return ( - int(self._stream.get(cv2.CAP_PROP_FRAME_WIDTH)), - int(self._stream.get(cv2.CAP_PROP_FRAME_HEIGHT)), - ) - - -class ImagesSaver: - """ - Base class for saving YOLO model outputs. Saves each image as an individual file in - the given directory - - :param save_dir: path to directory to write to - """ - - def __init__(self, save_dir: str): - self._save_dir = save_dir - self._idx = 0 - - create_dirs(save_dir) - - def save_frame(self, image: numpy.ndarray): - """ - :param image: numpy array of image to save - """ - output_path = os.path.join(self._save_dir, f"result-{self._idx}.jpg") - cv2.imwrite(output_path, image) - self._idx += 1 - - def close(self): - """ - perform any clean-up tasks - """ - pass - - -class VideoSaver(ImagesSaver): - """ - Class for saving YOLO model outputs as a VideoFile - - :param save_dir: path to directory to write to - :param original_fps: frames per second to save video with - :param output_frame_size: size of frames to write - :param target_fps: fps target for output video. if present, video - will be written with a certain number of the original frames - evenly dropped to match the target FPS. - """ - - def __init__( - self, - save_dir: str, - original_fps: float, - output_frame_size: Tuple[int, int], - target_fps: Optional[float] = None, - ): - super().__init__(save_dir) - - self._output_frame_size = output_frame_size - self._original_fps = original_fps - - if target_fps is not None and target_fps >= original_fps: - print( - f"target_fps {target_fps} is greater than source_fps " - f"{original_fps}. target fps file will not be invoked" - ) - self._target_fps = target_fps - - self._file_path = os.path.join(self._save_dir, "results.mp4") - self._writer = cv2.VideoWriter( - self._file_path, - cv2.VideoWriter_fourcc(*"mp4v"), - original_fps, - self._output_frame_size, - ) - self._n_frames = 0 - - def save_frame(self, image: numpy.ndarray): - """ - :param image: numpy array of image to save - """ - self._writer.write(image) - self._n_frames += 1 - - def close(self): - """ - perform any clean-up tasks - """ - self._writer.release() - if self._target_fps is not None and self._target_fps < self._original_fps: - self._write_target_fps_video() - - def _write_target_fps_video(self): - assert self._target_fps is not None - num_frames_to_keep = int( - self._n_frames * (self._target_fps / self._original_fps) - ) - # adjust target fps so we can keep the same video duration - adjusted_target_fps = num_frames_to_keep * (self._original_fps / self._n_frames) - - # select num_frames_to_keep evenly spaced frame idxs - frame_idxs_to_keep = set( - numpy.round(numpy.linspace(0, self._n_frames, num_frames_to_keep)) - .astype(int) - .tolist() - ) - - # create new video writer for adjusted video - vid_path = os.path.join( - self._save_dir, f"_results-{adjusted_target_fps:.2f}fps.mp4" - ) - fps_writer = cv2.VideoWriter( - vid_path, - cv2.VideoWriter_fourcc(*"mp4v"), - adjusted_target_fps, - self._output_frame_size, - ) - - # read from original video and write to FPS adjusted video - saved_vid = cv2.VideoCapture(self._file_path) - for idx in range(self._n_frames): - _, frame = saved_vid.read() - if idx in frame_idxs_to_keep: - fps_writer.write(frame) - - saved_vid.release() - fps_writer.release() - shutil.move(vid_path, self._file_path) # overwrite original file - - -def load_image( - img: Union[str, numpy.ndarray], image_size: Tuple[int, int] = (640, 640) -) -> Tuple[List[numpy.ndarray], List[numpy.ndarray]]: - """ - :param img: file path to image or raw image array - :param image_size: target shape for image - :return: Image loaded into numpy and reshaped to the given shape and the original - image - """ - img = cv2.imread(img) if isinstance(img, str) else img - img_resized = cv2.resize(img, image_size) - img_transposed = img_resized[:, :, ::-1].transpose(2, 0, 1) - - return img_transposed, img - - -def get_annotations_save_dir( - initial_save_dir: str, - tag: Optional[str] = None, - engine: Optional[str] = None, -) -> str: - """ - Returns the directory to save annotations to. If directory exists and is - non-empty, a number is appended to the end of the directory name. - - :param initial_save_dir: Initial directory to save annotations to - :param tag: A tag under which to save the annotations inside `save_dir` - :param engine: Used to generate a unique tag if it is not provided. - :return: A new unique dir path to save annotations to - """ - name = tag or f"{engine}-annotations" - initial_save_dir = os.path.join(initial_save_dir, name) - counter = 0 - new_save_dir = initial_save_dir - while Path(new_save_dir).exists() and any(Path(new_save_dir).iterdir()): - counter += 1 - new_save_dir = os.path.join(initial_save_dir, f"{name}-{counter:03d}") - - _LOGGER.info(f"Results will be saved to {new_save_dir}") - Path(new_save_dir).mkdir(parents=True, exist_ok=True) - return new_save_dir