From c6955f3ffda9db20ea8a917a1b7d2db63ab1bba5 Mon Sep 17 00:00:00 2001
From: rahul-tuli <rahul@neuralmagic.com>
Date: Mon, 8 Jan 2024 11:29:42 -0500
Subject: [PATCH 1/2] move evaluator

---
 src/deepsparse/evaluation/cli.py              |   4 +-
 src/deepsparse/evaluation/evaluator.py        |   8 +-
 .../integrations/lm_evaluation_harness.py     |   6 +-
 src/deepsparse/evaluation/registry.py         |  15 +--
 src/deepsparse/evaluation/results.py          | 107 ------------------
 tests/deepsparse/evaluation/test_evaluator.py |   4 +-
 tests/deepsparse/evaluation/test_registry.py  |  51 ---------
 tests/deepsparse/evaluation/test_results.py   |  78 -------------
 8 files changed, 16 insertions(+), 257 deletions(-)
 delete mode 100644 src/deepsparse/evaluation/results.py
 delete mode 100644 tests/deepsparse/evaluation/test_registry.py
 delete mode 100644 tests/deepsparse/evaluation/test_results.py

diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py
index ed7ea72831..9314d8d0de 100644
--- a/src/deepsparse/evaluation/cli.py
+++ b/src/deepsparse/evaluation/cli.py
@@ -59,7 +59,7 @@
 ##########
 Example command for evaluating a quantized MPT model from SparseZoo using the Deepsparse Engine.
 The evaluation will be run using `lm-evaluation-harness` on `hellaswag` and `gsm8k` datasets:
-deepsparse.eval zoo:mpt-7b-mpt_pretrain-base_quantized \
+deepsparse.eval --target zoo:mpt-7b-mpt_pretrain-base_quantized \
                 --dataset hellaswag \
                 --dataset gsm8k \
                 --integration lm-evaluation-harness \
@@ -72,13 +72,13 @@
 import click
 
 from deepsparse.evaluation.evaluator import evaluate
-from deepsparse.evaluation.results import Result, save_result
 from deepsparse.evaluation.utils import args_to_dict, get_save_path
 from deepsparse.operators.engine_operator import (
     DEEPSPARSE_ENGINE,
     ORT_ENGINE,
     TORCHSCRIPT_ENGINE,
 )
+from sparsezoo.evaluation.results import Result, save_result
 
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py
index 7bd56adf6e..d99d520cda 100644
--- a/src/deepsparse/evaluation/evaluator.py
+++ b/src/deepsparse/evaluation/evaluator.py
@@ -14,14 +14,14 @@
 import logging
 from typing import Any, List, Optional, Union
 
-from deepsparse.evaluation.registry import EvaluationRegistry
-from deepsparse.evaluation.results import Result
+from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry
 from deepsparse.evaluation.utils import create_model_from_target
 from deepsparse.operators.engine_operator import (
     DEEPSPARSE_ENGINE,
     ORT_ENGINE,
     TORCHSCRIPT_ENGINE,
 )
+from sparsezoo.evaluation.results import Result
 
 
 __all__ = ["evaluate"]
@@ -50,7 +50,9 @@ def evaluate(
         else target
     )
 
-    eval_integration = EvaluationRegistry.resolve(model, datasets, integration)
+    eval_integration = DeepSparseEvaluationRegistry.resolve(
+        model, datasets, integration
+    )
 
     return eval_integration(
         model=model,
diff --git a/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py b/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py
index 2f8c7b8cef..72ddea142e 100644
--- a/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py
+++ b/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py
@@ -27,9 +27,9 @@
 
 import torch
 from deepsparse import Pipeline
-from deepsparse.evaluation.registry import EvaluationRegistry
-from deepsparse.evaluation.results import Dataset, Evaluation, Metric, Result
+from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry
 from lm_eval import base, evaluator, tasks, utils
+from sparsezoo.evaluation.results import Dataset, Evaluation, Metric, Result
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -37,7 +37,7 @@
 __all__ = ["integration_eval"]
 
 
-@EvaluationRegistry.register(name="lm-evaluation-harness")
+@DeepSparseEvaluationRegistry.register(name="lm-evaluation-harness")
 def integration_eval(
     model: Any,
     datasets: Union[List[str], str],
diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py
index 5b6e45bc1c..5cc724e30c 100644
--- a/src/deepsparse/evaluation/registry.py
+++ b/src/deepsparse/evaluation/registry.py
@@ -17,24 +17,20 @@
 import logging
 from typing import Any, Callable, List, Optional, Union
 
-from sparsezoo.utils.registry import RegistryMixin
+from sparsezoo.evaluation import EvaluationRegistry
 
 
-__all__ = ["EvaluationRegistry"]
+__all__ = ["DeepSparseEvaluationRegistry"]
 
 _LOGGER = logging.getLogger(__name__)
 
 
-class EvaluationRegistry(RegistryMixin):
+class DeepSparseEvaluationRegistry(EvaluationRegistry):
     """
     Extends the RegistryMixin to enable registering
     and loading of evaluation functions.
     """
 
-    @classmethod
-    def load_from_registry(cls, name: str) -> Callable[..., "Result"]:  # noqa: F821
-        return cls.get_value_from_registry(name=name)
-
     @classmethod
     def resolve(
         cls,
@@ -70,7 +66,4 @@ def resolve(
 
         potentially_check_dependency_import(integration)
 
-        try:
-            return cls.load_from_registry(name=integration)
-        except KeyError as err:
-            raise KeyError(err)
+        return cls.load_from_registry(name=integration)
diff --git a/src/deepsparse/evaluation/results.py b/src/deepsparse/evaluation/results.py
deleted file mode 100644
index 00212d0a1e..0000000000
--- a/src/deepsparse/evaluation/results.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any, List, Optional
-
-import yaml
-from pydantic import BaseModel, Field
-
-from deepsparse.utils.data import prep_for_serialization
-
-
-__all__ = [
-    "Metric",
-    "Dataset",
-    "EvalSample",
-    "Evaluation",
-    "Result",
-    "save_result",
-]
-
-
-class Metric(BaseModel):
-    name: str = Field(description="Name of the metric")
-    value: float = Field(description="Value of the metric")
-
-
-class Dataset(BaseModel):
-    type: Optional[str] = Field(description="Type of dataset")
-    name: str = Field(description="Name of the dataset")
-    config: Any = Field(description="Configuration for the dataset")
-    split: Optional[str] = Field(description="Split of the dataset")
-
-
-class EvalSample(BaseModel):
-    input: Any = Field(description="Sample input to the model")
-    output: Any = Field(description="Sample output from the model")
-
-
-class Evaluation(BaseModel):
-    task: str = Field(
-        description="Name of the evaluation integration "
-        "that the evaluation was performed on"
-    )
-    dataset: Dataset = Field(description="Dataset that the evaluation was performed on")
-    metrics: List[Metric] = Field(description="List of metrics for the evaluation")
-    samples: Optional[List[EvalSample]] = Field(
-        description="List of samples for the evaluation"
-    )
-
-
-class Result(BaseModel):
-    formatted: List[Evaluation] = Field(
-        description="Evaluation result represented in the unified, structured format"
-    )
-    raw: Any = Field(
-        description="Evaluation result represented in the raw format "
-        "(characteristic for the specific evaluation integration)"
-    )
-
-
-def save_result(
-    result: Result,
-    save_path: str,
-    save_format: str = "json",
-):
-    """
-    Saves a list of Evaluation objects to a file in the specified format.
-
-    :param result: Result object to save
-    :param save_path: Path to save the evaluations to.
-    :param save_format: Format to save the evaluations in.
-    :return: The serialized evaluations
-    """
-    # prepare the Result object for serialization
-    result: Result = prep_for_serialization(result)
-    if save_format == "json":
-        _save_to_json(result, save_path)
-    elif save_format == "yaml":
-        _save_to_yaml(result, save_path)
-    else:
-        NotImplementedError("Currently only json and yaml formats are supported")
-
-
-def _save_to_json(result: Result, save_path: str):
-    _save(result.json(), save_path, expected_ext=".json")
-
-
-def _save_to_yaml(result: Result, save_path: str):
-    _save(yaml.dump(result.dict()), save_path, expected_ext=".yaml")
-
-
-def _save(data: str, save_path: str, expected_ext: str):
-    if not save_path.endswith(expected_ext):
-        raise ValueError(f"save_path must end with extension: {expected_ext}")
-    with open(save_path, "w") as f:
-        f.write(data)
diff --git a/tests/deepsparse/evaluation/test_evaluator.py b/tests/deepsparse/evaluation/test_evaluator.py
index dedd63fa36..9bb8fed546 100644
--- a/tests/deepsparse/evaluation/test_evaluator.py
+++ b/tests/deepsparse/evaluation/test_evaluator.py
@@ -21,7 +21,7 @@
 import pytest
 from deepsparse.evaluation.evaluator import evaluate
 from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness
-from deepsparse.evaluation.registry import EvaluationRegistry
+from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry
 from deepsparse.evaluation.results import (
     Dataset,
     EvalSample,
@@ -31,7 +31,7 @@
 )
 
 
-@EvaluationRegistry.register()
+@DeepSparseEvaluationRegistry.register()
 def dummy_integration(*args, **kwargs):
     result_formatted = [
         Evaluation(
diff --git a/tests/deepsparse/evaluation/test_registry.py b/tests/deepsparse/evaluation/test_registry.py
deleted file mode 100644
index 6af554e9ee..0000000000
--- a/tests/deepsparse/evaluation/test_registry.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-from deepsparse.evaluation.registry import EvaluationRegistry
-
-
-@pytest.fixture
-def registry_with_foo():
-    class Registry(EvaluationRegistry):
-        pass
-
-    @Registry.register()
-    def foo(*args, **kwargs):
-        return "foo"
-
-    return Registry
-
-
-@pytest.fixture
-def registry_with_buzz():
-    class Registry(EvaluationRegistry):
-        pass
-
-    @Registry.register(name=["buzz", "buzzer"])
-    def buzz(*args, **kwargs):
-        return "buzz"
-
-    return Registry
-
-
-def test_get_foo_from_registry(registry_with_foo):
-    eval_function = registry_with_foo.load_from_registry("foo")
-    assert eval_function() == "foo"
-
-
-def test_get_multiple_buzz_from_registry(registry_with_buzz):
-    eval_function_1 = registry_with_buzz.load_from_registry("buzz")
-    eval_function_2 = registry_with_buzz.load_from_registry("buzzer")
-    assert eval_function_1() == eval_function_2() == "buzz"
diff --git a/tests/deepsparse/evaluation/test_results.py b/tests/deepsparse/evaluation/test_results.py
deleted file mode 100644
index c7a580c374..0000000000
--- a/tests/deepsparse/evaluation/test_results.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-
-import numpy as np
-import yaml
-
-import pytest
-from deepsparse.evaluation.results import (
-    Dataset,
-    EvalSample,
-    Evaluation,
-    Metric,
-    Result,
-    save_result,
-)
-
-
-@pytest.fixture()
-def evaluations():
-    return [
-        Evaluation(
-            task="task_1",
-            dataset=Dataset(
-                type="type_1", name="name_1", config="config_1", split="split_1"
-            ),
-            metrics=[Metric(name="metric_name_1", value=1.0)],
-            samples=[EvalSample(input=np.array([[5]]), output=5)],
-        ),
-        Evaluation(
-            task="task_2",
-            dataset=Dataset(
-                type="type_2", name="name_2", config="config_2", split="split_2"
-            ),
-            metrics=[
-                Metric(name="metric_name_2", value=2.0),
-                Metric(name="metric_name_3", value=3.0),
-            ],
-            samples=[
-                EvalSample(input=np.array([[10.0]]), output=10.0),
-                EvalSample(input=np.array([[20.0]]), output=20.0),
-            ],
-        ),
-    ]
-
-
-@pytest.fixture()
-def result(evaluations):
-    return Result(formatted=evaluations, raw="dummy_raw_evaluation")
-
-
-def test_serialize_result_json(tmp_path, result):
-    path_to_file = tmp_path / "result.json"
-    save_result(result=result, save_format="json", save_path=path_to_file.as_posix())
-
-    with open(path_to_file.as_posix(), "r") as f:
-        reloaded_results = json.load(f)
-    assert reloaded_results == result.dict()
-
-
-def test_serialize_result_yaml(tmp_path, result):
-    path_to_file = tmp_path / "result.yaml"
-    save_result(result=result, save_format="yaml", save_path=path_to_file.as_posix())
-    with open(path_to_file.as_posix(), "r") as f:
-        reloaded_results = yaml.safe_load(f)
-    assert reloaded_results == result.dict()

From 34158406f0131135155f34cb026b39ec56961142 Mon Sep 17 00:00:00 2001
From: rahul-tuli <rahul@neuralmagic.com>
Date: Tue, 9 Jan 2024 10:01:05 -0500
Subject: [PATCH 2/2] Update to use `get_value_from_registry` rather than
 overriding `load_from_registry`

---
 src/deepsparse/evaluation/evaluator.py |  5 +----
 src/deepsparse/evaluation/registry.py  | 10 +++++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py
index d99d520cda..5edb64bc3c 100644
--- a/src/deepsparse/evaluation/evaluator.py
+++ b/src/deepsparse/evaluation/evaluator.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import logging
 from typing import Any, List, Optional, Union
 
 from deepsparse.evaluation.registry import DeepSparseEvaluationRegistry
@@ -26,8 +25,6 @@
 
 __all__ = ["evaluate"]
 
-_LOGGER = logging.getLogger(__name__)
-
 
 def evaluate(
     target: Any,
@@ -51,7 +48,7 @@ def evaluate(
     )
 
     eval_integration = DeepSparseEvaluationRegistry.resolve(
-        model, datasets, integration
+        model=model, datasets=datasets, integration=integration
     )
 
     return eval_integration(
diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py
index 5cc724e30c..1a35dea5a9 100644
--- a/src/deepsparse/evaluation/registry.py
+++ b/src/deepsparse/evaluation/registry.py
@@ -28,7 +28,11 @@
 class DeepSparseEvaluationRegistry(EvaluationRegistry):
     """
     Extends the RegistryMixin to enable registering
-    and loading of evaluation functions.
+    and loading of evaluation functions for DeepSparse.
+
+    Adds a resolve method to automatically infer the integration
+    from the model and datasets if not specified, and returns
+    the appropriate evaluation function as a callable.
     """
 
     @classmethod
@@ -39,7 +43,7 @@ def resolve(
         integration: Optional[str] = None,
     ) -> Callable[..., "Result"]:  # noqa: F821
         """
-        Chooses an evaluation function from the registry based on the target,
+        Chooses an evaluation function from the registry based on the model,
         datasets and integration.
 
         If integration is specified, attempts to load the evaluation function
@@ -66,4 +70,4 @@ def resolve(
 
         potentially_check_dependency_import(integration)
 
-        return cls.load_from_registry(name=integration)
+        return cls.get_value_from_registry(name=integration)