From c6955f3ffda9db20ea8a917a1b7d2db63ab1bba5 Mon Sep 17 00:00:00 2001 From: rahul-tuli Date: Mon, 8 Jan 2024 11:29:42 -0500 Subject: [PATCH 1/2] move evaluator --- src/deepsparse/evaluation/cli.py | 4 +- src/deepsparse/evaluation/evaluator.py | 8 +- .../integrations/lm_evaluation_harness.py | 6 +- src/deepsparse/evaluation/registry.py | 15 +-- src/deepsparse/evaluation/results.py | 107 ------------------ tests/deepsparse/evaluation/test_evaluator.py | 4 +- tests/deepsparse/evaluation/test_registry.py | 51 --------- tests/deepsparse/evaluation/test_results.py | 78 ------------- 8 files changed, 16 insertions(+), 257 deletions(-) delete mode 100644 src/deepsparse/evaluation/results.py delete mode 100644 tests/deepsparse/evaluation/test_registry.py delete mode 100644 tests/deepsparse/evaluation/test_results.py diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py index ed7ea72831..9314d8d0de 100644 --- a/src/deepsparse/evaluation/cli.py +++ b/src/deepsparse/evaluation/cli.py @@ -59,7 +59,7 @@ ########## Example command for evaluating a quantized MPT model from SparseZoo using the Deepsparse Engine. The evaluation will be run using `lm-evaluation-harness` on `hellaswag` and `gsm8k` datasets: -deepsparse.eval zoo:mpt-7b-mpt_pretrain-base_quantized \ +deepsparse.eval --target zoo:mpt-7b-mpt_pretrain-base_quantized \ --dataset hellaswag \ --dataset gsm8k \ --integration lm-evaluation-harness \ @@ -72,13 +72,13 @@ import click from deepsparse.evaluation.evaluator import evaluate -from deepsparse.evaluation.results import Result, save_result from deepsparse.evaluation.utils import args_to_dict, get_save_path from deepsparse.operators.engine_operator import ( DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, ) +from sparsezoo.evaluation.results import Result, save_result _LOGGER = logging.getLogger(__name__) diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py index 7bd56adf6e..d99d520cda 100644 --- a/src/deepsparse/evaluation/evaluator.py +++ b/src/deepsparse/evaluation/evaluator.py @@ -14,14 +14,14 @@ import logging from typing import Any, List, Optional, Union -from deepsparse.evaluation.registry import EvaluationRegistry -from deepsparse.evaluation.results import Result +from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry from deepsparse.evaluation.utils import create_model_from_target from deepsparse.operators.engine_operator import ( DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, ) +from sparsezoo.evaluation.results import Result __all__ = ["evaluate"] @@ -50,7 +50,9 @@ def evaluate( else target ) - eval_integration = EvaluationRegistry.resolve(model, datasets, integration) + eval_integration = DeepSparseEvaluationRegistry.resolve( + model, datasets, integration + ) return eval_integration( model=model, diff --git a/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py b/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py index 2f8c7b8cef..72ddea142e 100644 --- a/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py +++ b/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py @@ -27,9 +27,9 @@ import torch from deepsparse import Pipeline -from deepsparse.evaluation.registry import EvaluationRegistry -from deepsparse.evaluation.results import Dataset, Evaluation, Metric, Result +from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry from lm_eval import base, evaluator, tasks, utils +from sparsezoo.evaluation.results import Dataset, Evaluation, Metric, Result _LOGGER = logging.getLogger(__name__) @@ -37,7 +37,7 @@ __all__ = ["integration_eval"] -@EvaluationRegistry.register(name="lm-evaluation-harness") +@DeepSparseEvaluationRegistry.register(name="lm-evaluation-harness") def integration_eval( model: Any, datasets: Union[List[str], str], diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py index 5b6e45bc1c..5cc724e30c 100644 --- a/src/deepsparse/evaluation/registry.py +++ b/src/deepsparse/evaluation/registry.py @@ -17,24 +17,20 @@ import logging from typing import Any, Callable, List, Optional, Union -from sparsezoo.utils.registry import RegistryMixin +from sparsezoo.evaluation import EvaluationRegistry -__all__ = ["EvaluationRegistry"] +__all__ = ["DeepSparseEvaluationRegistry"] _LOGGER = logging.getLogger(__name__) -class EvaluationRegistry(RegistryMixin): +class DeepSparseEvaluationRegistry(EvaluationRegistry): """ Extends the RegistryMixin to enable registering and loading of evaluation functions. """ - @classmethod - def load_from_registry(cls, name: str) -> Callable[..., "Result"]: # noqa: F821 - return cls.get_value_from_registry(name=name) - @classmethod def resolve( cls, @@ -70,7 +66,4 @@ def resolve( potentially_check_dependency_import(integration) - try: - return cls.load_from_registry(name=integration) - except KeyError as err: - raise KeyError(err) + return cls.load_from_registry(name=integration) diff --git a/src/deepsparse/evaluation/results.py b/src/deepsparse/evaluation/results.py deleted file mode 100644 index 00212d0a1e..0000000000 --- a/src/deepsparse/evaluation/results.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any, List, Optional - -import yaml -from pydantic import BaseModel, Field - -from deepsparse.utils.data import prep_for_serialization - - -__all__ = [ - "Metric", - "Dataset", - "EvalSample", - "Evaluation", - "Result", - "save_result", -] - - -class Metric(BaseModel): - name: str = Field(description="Name of the metric") - value: float = Field(description="Value of the metric") - - -class Dataset(BaseModel): - type: Optional[str] = Field(description="Type of dataset") - name: str = Field(description="Name of the dataset") - config: Any = Field(description="Configuration for the dataset") - split: Optional[str] = Field(description="Split of the dataset") - - -class EvalSample(BaseModel): - input: Any = Field(description="Sample input to the model") - output: Any = Field(description="Sample output from the model") - - -class Evaluation(BaseModel): - task: str = Field( - description="Name of the evaluation integration " - "that the evaluation was performed on" - ) - dataset: Dataset = Field(description="Dataset that the evaluation was performed on") - metrics: List[Metric] = Field(description="List of metrics for the evaluation") - samples: Optional[List[EvalSample]] = Field( - description="List of samples for the evaluation" - ) - - -class Result(BaseModel): - formatted: List[Evaluation] = Field( - description="Evaluation result represented in the unified, structured format" - ) - raw: Any = Field( - description="Evaluation result represented in the raw format " - "(characteristic for the specific evaluation integration)" - ) - - -def save_result( - result: Result, - save_path: str, - save_format: str = "json", -): - """ - Saves a list of Evaluation objects to a file in the specified format. - - :param result: Result object to save - :param save_path: Path to save the evaluations to. - :param save_format: Format to save the evaluations in. - :return: The serialized evaluations - """ - # prepare the Result object for serialization - result: Result = prep_for_serialization(result) - if save_format == "json": - _save_to_json(result, save_path) - elif save_format == "yaml": - _save_to_yaml(result, save_path) - else: - NotImplementedError("Currently only json and yaml formats are supported") - - -def _save_to_json(result: Result, save_path: str): - _save(result.json(), save_path, expected_ext=".json") - - -def _save_to_yaml(result: Result, save_path: str): - _save(yaml.dump(result.dict()), save_path, expected_ext=".yaml") - - -def _save(data: str, save_path: str, expected_ext: str): - if not save_path.endswith(expected_ext): - raise ValueError(f"save_path must end with extension: {expected_ext}") - with open(save_path, "w") as f: - f.write(data) diff --git a/tests/deepsparse/evaluation/test_evaluator.py b/tests/deepsparse/evaluation/test_evaluator.py index dedd63fa36..9bb8fed546 100644 --- a/tests/deepsparse/evaluation/test_evaluator.py +++ b/tests/deepsparse/evaluation/test_evaluator.py @@ -21,7 +21,7 @@ import pytest from deepsparse.evaluation.evaluator import evaluate from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness -from deepsparse.evaluation.registry import EvaluationRegistry +from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry from deepsparse.evaluation.results import ( Dataset, EvalSample, @@ -31,7 +31,7 @@ ) -@EvaluationRegistry.register() +@DeepSparseEvaluationRegistry.register() def dummy_integration(*args, **kwargs): result_formatted = [ Evaluation( diff --git a/tests/deepsparse/evaluation/test_registry.py b/tests/deepsparse/evaluation/test_registry.py deleted file mode 100644 index 6af554e9ee..0000000000 --- a/tests/deepsparse/evaluation/test_registry.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from deepsparse.evaluation.registry import EvaluationRegistry - - -@pytest.fixture -def registry_with_foo(): - class Registry(EvaluationRegistry): - pass - - @Registry.register() - def foo(*args, **kwargs): - return "foo" - - return Registry - - -@pytest.fixture -def registry_with_buzz(): - class Registry(EvaluationRegistry): - pass - - @Registry.register(name=["buzz", "buzzer"]) - def buzz(*args, **kwargs): - return "buzz" - - return Registry - - -def test_get_foo_from_registry(registry_with_foo): - eval_function = registry_with_foo.load_from_registry("foo") - assert eval_function() == "foo" - - -def test_get_multiple_buzz_from_registry(registry_with_buzz): - eval_function_1 = registry_with_buzz.load_from_registry("buzz") - eval_function_2 = registry_with_buzz.load_from_registry("buzzer") - assert eval_function_1() == eval_function_2() == "buzz" diff --git a/tests/deepsparse/evaluation/test_results.py b/tests/deepsparse/evaluation/test_results.py deleted file mode 100644 index c7a580c374..0000000000 --- a/tests/deepsparse/evaluation/test_results.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -import numpy as np -import yaml - -import pytest -from deepsparse.evaluation.results import ( - Dataset, - EvalSample, - Evaluation, - Metric, - Result, - save_result, -) - - -@pytest.fixture() -def evaluations(): - return [ - Evaluation( - task="task_1", - dataset=Dataset( - type="type_1", name="name_1", config="config_1", split="split_1" - ), - metrics=[Metric(name="metric_name_1", value=1.0)], - samples=[EvalSample(input=np.array([[5]]), output=5)], - ), - Evaluation( - task="task_2", - dataset=Dataset( - type="type_2", name="name_2", config="config_2", split="split_2" - ), - metrics=[ - Metric(name="metric_name_2", value=2.0), - Metric(name="metric_name_3", value=3.0), - ], - samples=[ - EvalSample(input=np.array([[10.0]]), output=10.0), - EvalSample(input=np.array([[20.0]]), output=20.0), - ], - ), - ] - - -@pytest.fixture() -def result(evaluations): - return Result(formatted=evaluations, raw="dummy_raw_evaluation") - - -def test_serialize_result_json(tmp_path, result): - path_to_file = tmp_path / "result.json" - save_result(result=result, save_format="json", save_path=path_to_file.as_posix()) - - with open(path_to_file.as_posix(), "r") as f: - reloaded_results = json.load(f) - assert reloaded_results == result.dict() - - -def test_serialize_result_yaml(tmp_path, result): - path_to_file = tmp_path / "result.yaml" - save_result(result=result, save_format="yaml", save_path=path_to_file.as_posix()) - with open(path_to_file.as_posix(), "r") as f: - reloaded_results = yaml.safe_load(f) - assert reloaded_results == result.dict() From 34158406f0131135155f34cb026b39ec56961142 Mon Sep 17 00:00:00 2001 From: rahul-tuli Date: Tue, 9 Jan 2024 10:01:05 -0500 Subject: [PATCH 2/2] Update to use `get_value_from_registry` rather than overriding `load_from_registry` --- src/deepsparse/evaluation/evaluator.py | 5 +---- src/deepsparse/evaluation/registry.py | 10 +++++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py index d99d520cda..5edb64bc3c 100644 --- a/src/deepsparse/evaluation/evaluator.py +++ b/src/deepsparse/evaluation/evaluator.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging from typing import Any, List, Optional, Union from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry @@ -26,8 +25,6 @@ __all__ = ["evaluate"] -_LOGGER = logging.getLogger(__name__) - def evaluate( target: Any, @@ -51,7 +48,7 @@ def evaluate( ) eval_integration = DeepSparseEvaluationRegistry.resolve( - model, datasets, integration + model=model, datasets=datasets, integration=integration ) return eval_integration( diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py index 5cc724e30c..1a35dea5a9 100644 --- a/src/deepsparse/evaluation/registry.py +++ b/src/deepsparse/evaluation/registry.py @@ -28,7 +28,11 @@ class DeepSparseEvaluationRegistry(EvaluationRegistry): """ Extends the RegistryMixin to enable registering - and loading of evaluation functions. + and loading of evaluation functions for DeepSparse. + + Adds a resolve method to automatically infer the integration + from the model and datasets if not specified, and returns + the appropriate evaluation function as a callable. """ @classmethod @@ -39,7 +43,7 @@ def resolve( integration: Optional[str] = None, ) -> Callable[..., "Result"]: # noqa: F821 """ - Chooses an evaluation function from the registry based on the target, + Chooses an evaluation function from the registry based on the model, datasets and integration. If integration is specified, attempts to load the evaluation function @@ -66,4 +70,4 @@ def resolve( potentially_check_dependency_import(integration) - return cls.load_from_registry(name=integration) + return cls.get_value_from_registry(name=integration)