From fb014fed19047d5af61bc5cb620548d4a32b16da Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 3 May 2022 11:07:55 -0400 Subject: [PATCH 1/3] BUGFIX: updated pipeline_cli to work with new pipeline refactor Updated a few typos in docstring in pipeline.py Also noting that `from deepsparse import Pipeline` works now --- .../transformers/pipelines/pipeline.py | 10 +- src/deepsparse/transformers/pipelines_cli.py | 100 ++++++++++++------ 2 files changed, 72 insertions(+), 38 deletions(-) diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py index 2fdcd27236..82d09dd169 100644 --- a/src/deepsparse/transformers/pipelines/pipeline.py +++ b/src/deepsparse/transformers/pipelines/pipeline.py @@ -169,12 +169,12 @@ def pipeline( **kwargs, ) -> Pipeline: """ - [DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete DeepSparse - pipelines for tranformers tasks use deepsparse.Pipeline.create(task, ...) + [DEPRECATED] - deepsparse.transformers.pipeline is deprecated to create DeepSparse + pipelines for transformers tasks use deepsparse.Pipeline.create(task, ...) Utility factory method to build a Pipeline - :param task: name of the task to define which pipeline to create. Currently + :param task: name of the task to define which pipeline to create. Currently, supported task - "question-answering" :param model_name: canonical name of the hugging face model this model is based on :param model_path: path to model directory containing `model.onnx`, `config.json`, @@ -194,8 +194,8 @@ def pipeline( :return: Pipeline object for the given taks and model """ warnings.warn( - "[DEPRECATED] - deepsparse.transformers.pipeline is deprecated to craete " - "DeepSparse pipelines for tranformers tasks use deepsparse.Pipeline.create()" + "[DEPRECATED] - deepsparse.transformers.pipeline is deprecated to create " + "DeepSparse pipelines for transformers tasks use deepsparse.Pipeline.create()" ) if config is not None or tokenizer is not None: diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py index 61ac88b496..f18e77093e 100644 --- a/src/deepsparse/transformers/pipelines_cli.py +++ b/src/deepsparse/transformers/pipelines_cli.py @@ -22,7 +22,6 @@ text-classification,token-classification}] -d DATA [--model-name MODEL_NAME] --model-path MODEL_PATH [--engine-type {deepsparse,onnxruntime}] - [--config CONFIG] [--tokenizer TOKENIZER] [--max-length MAX_LENGTH] [--num-cores NUM_CORES] [-b BATCH_SIZE] [--scheduler {multi,single}] [-o OUTPUT_FILE] @@ -50,12 +49,6 @@ --engine-type {deepsparse,onnxruntime}, --engine_type {deepsparse,onnxruntime} inference engine name to use. Supported options are 'deepsparse'and 'onnxruntime' - --config CONFIG Huggingface model config, if none provided, default - will be usedwhich will be from the model name or - sparsezoo stub if given for model path - --tokenizer TOKENIZER - Huggingface tokenizer, if none provided, default will - be used --max-length MAX_LENGTH, --max_length MAX_LENGTH Maximum sequence length of model inputs. default is 128 @@ -78,7 +71,6 @@ 2) deepsparse.transformers.run_inference --task ner \ --model-path models/bert-ner-test.onnx \ --data input.txt \ - --config ner-config.json \ --output-file out.txt \ --batch_size 2 @@ -91,16 +83,28 @@ """ import argparse -from typing import Optional +import json +from typing import Any, Callable, Optional -from .loaders import SUPPORTED_EXTENSIONS -from .pipelines import SUPPORTED_ENGINES, SUPPORTED_TASKS, pipeline, process_dataset +from pydantic import BaseModel + +from deepsparse.pipeline import SUPPORTED_PIPELINE_ENGINES +from deepsparse.transformers import fix_numpy_types +from deepsparse.transformers.loaders import SUPPORTED_EXTENSIONS, get_batch_loader +from deepsparse.transformers.pipelines import pipeline __all__ = [ "cli", ] +SUPPORTED_TASKS = [ + "question_answering", + "text_classification", + "token_classification", + "ner", +] + def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( @@ -111,8 +115,8 @@ def _parse_args() -> argparse.Namespace: "-t", "--task", help="Name of the task to define which pipeline to create." - f" Currently supported tasks {list(SUPPORTED_TASKS.keys())}", - choices=SUPPORTED_TASKS.keys(), + f" Currently supported tasks {SUPPORTED_TASKS}", + choices=SUPPORTED_TASKS, type=str, default="sentiment-analysis", ) @@ -150,24 +154,8 @@ def _parse_args() -> argparse.Namespace: help="Inference engine name to use. Supported options are 'deepsparse'" "and 'onnxruntime'", type=str, - choices=SUPPORTED_ENGINES, - default=SUPPORTED_ENGINES[0], - ) - - parser.add_argument( - "--config", - help="Huggingface model config, if none provided, default will be used" - "which will be from the model name or sparsezoo stub if given for " - "model path", - type=str, - default=None, - ) - - parser.add_argument( - "--tokenizer", - help="Huggingface tokenizer, if none provided, default will be used", - type=str, - default=None, + choices=SUPPORTED_PIPELINE_ENGINES, + default=SUPPORTED_PIPELINE_ENGINES[0], ) parser.add_argument( @@ -229,8 +217,6 @@ def cli(): model_name=_args.model_name, model_path=_args.model_path, engine_type=_args.engine_type, - config=_args.config, - tokenizer=_args.tokenizer, max_length=_args.max_length, num_cores=_args.num_cores, batch_size=_args.batch_size, @@ -245,5 +231,53 @@ def cli(): ) +def response_to_json(response: Any): + """ + Converts a response to a json string + + :param response: A List[Any] or Dict[Any, Any] or a Pydantic model, + that should be converted to a valid json string + :return: A json string representation of the response + """ + if isinstance(response, list): + return [response_to_json(val) for val in response] + elif isinstance(response, dict): + return {key: response_to_json(val) for key, val in response.items()} + elif hasattr(response, "json") and callable(response.json): + fixed = response.json() + return fixed + return json.dumps(response) + + +def process_dataset( + pipeline_object: Callable, + data_path: str, + batch_size: int, + task: str, + output_path: str, +) -> None: + """ + :param pipeline_object: An instantiated pipeline Callable object + :param data_path: Path to input file, supports csv, json and text files + :param batch_size: batch_size to use for inference + :param task: The task pipeline is instantiated for + :param output_path: Path to a json file to output inference results to + """ + batch_loader = get_batch_loader( + data_file=data_path, + batch_size=batch_size, + task=task, + ) + # Wraps pipeline object to make numpy types serializable + pipeline_object = fix_numpy_types(pipeline_object) + with open(output_path, "a") as output_file: + for batch in batch_loader: + batch_output = pipeline_object(**batch) + json_output = response_to_json(batch_output) + + json.dump(json_output, output_file) + output_file.write("\n") + + if __name__ == "__main__": cli() From fd74f248d232c0f034ee66d8a73ad14ad3a94afc Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 3 May 2022 12:45:05 -0400 Subject: [PATCH 2/3] Style --- src/deepsparse/transformers/pipelines_cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py index f18e77093e..8919ec35e9 100644 --- a/src/deepsparse/transformers/pipelines_cli.py +++ b/src/deepsparse/transformers/pipelines_cli.py @@ -86,8 +86,6 @@ import json from typing import Any, Callable, Optional -from pydantic import BaseModel - from deepsparse.pipeline import SUPPORTED_PIPELINE_ENGINES from deepsparse.transformers import fix_numpy_types from deepsparse.transformers.loaders import SUPPORTED_EXTENSIONS, get_batch_loader From 622c4666b4c7a82c31d0f70c34a4e399fcf74ede Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 3 May 2022 12:52:59 -0400 Subject: [PATCH 3/3] nit --- src/deepsparse/transformers/pipelines_cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py index 8919ec35e9..12b7e80d1c 100644 --- a/src/deepsparse/transformers/pipelines_cli.py +++ b/src/deepsparse/transformers/pipelines_cli.py @@ -242,8 +242,7 @@ def response_to_json(response: Any): elif isinstance(response, dict): return {key: response_to_json(val) for key, val in response.items()} elif hasattr(response, "json") and callable(response.json): - fixed = response.json() - return fixed + return response.json() return json.dumps(response)