Dev (#181)

* Added RelativeDifferenceScore * fixup! Added RelativeDifferenceScore * make judging from features more convenient, this method expects uses to have pre-existing code that performs something like judge generate prediction. Prediction is then just treated as a settable object attribute * Refactored cached prediction logic for tests * Restored feature_judge for backwards compatibility Co-authored-by: Russell Jarvis <russelljarvis@protonmail.com>
scidash · May 3, 2021 · 21821a3 · 21821a3
1 parent eeb64f3
commit 21821a3
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 12 deletions.
diff --git a/sciunit/models/runnable.py b/sciunit/models/runnable.py
@@ -47,7 +47,6 @@ def set_backend(self, backend: Union[str, tuple, list, None]):
             Exception: The backend was not found.
         """
         if inspect.isclass(backend) and Backend in backend.__bases__:
-            print(backend.__name__)
             name = backend.__name__
             args = []
             kwargs = {}

diff --git a/sciunit/scores/complete.py b/sciunit/scores/complete.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import quantities as pq
+from typing import Union
 from sciunit import errors, utils
 
 from .base import Score
@@ -213,8 +214,83 @@ def norm_score(self) -> float:
         return 1 - 2 * math.fabs(0.5 - cdf)
 
     def __str__(self):
-        return "Ratio = %.2f" % self.score
+        return 'Ratio = %.2f' % self.score
+
+
+class RelativeDifferenceScore(Score):
+    """A relative difference between prediction and observation.
+    
+    The absolute value of the difference between the prediction and the
+    observation is divided by a reference value with the same units. This
+    reference scale should be chosen for each test such that normalization
+    produces directly comparable scores across tests. For example, if 5 volts
+    represents a medium size difference for TestA, and 10 seconds represents a
+    medium size difference for TestB, then 5 volts and 10 seconds should be
+    used for this reference scale in TestA and TestB, respectively. The
+    attribute `scale` can be passed to the compute method or set for the whole
+    class in advance. Otherwise, a scale of 1 (in the units of the
+    observation and prediction) will be used.
+    """
+
+    _allowed_types = (float,)
+
+    _description = ('The relative difference between the prediction and the observation')
+
+    _best = 0.0  # A RelativeDifferenceScore of 0.0 is best
+
+    _worst = np.inf
+
+    scale = None
+
+    def _check_score(self, score):
+        if score < 0.0:
+            raise errors.InvalidScoreError(("RelativeDifferenceScore was initialized with "
+                                            "a score of %f, but a RelativeDifferenceScore "
+                                            "must be non-negative.") % score)
+
+    @classmethod
+    def compute(cls, observation: Union[dict, float, int, pq.Quantity],
+                     prediction: Union[dict, float, int, pq.Quantity],
+                     key=None,
+                     scale: Union[float, int, pq.Quantity, None] = None) -> 'RelativeDifferenceScore':
+        """Compute the relative difference between the observation and a prediction.
+
+        Returns:
+            RelativeDifferenceScore: A relative difference between an observation and a prediction.
+        """
+        assert isinstance(observation, (dict, float, int, pq.Quantity))
+        assert isinstance(prediction, (dict, float, int, pq.Quantity))
+
+        obs, pred = cls.extract_means_or_values(observation, prediction,
+                                                key=key)
+
+        scale = scale or cls.scale or (obs/float(obs))
+        assert type(obs) is type(scale)
+        assert type(obs) is type(pred)
+        if isinstance(obs, pq.Quantity):
+            assert obs.units == pred.units, \
+                "Prediction must have the same units as the observation"
+            assert obs.units == scale.units, \
+                "RelativeDifferenceScore.Scale must have the same units as the observation"
+        assert scale > 0, \
+            "RelativeDifferenceScore.scale must be positive (not %g)" % scale
+        value = np.abs(pred - obs) / scale
+        value = utils.assert_dimensionless(value)
+        return RelativeDifferenceScore(value)
 
+    @property
+    def norm_score(self) -> float:
+        """Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values.
+
+        Returns:
+            float: The value of the norm score.
+        """
+        x = self.score
+        return 1 / (1+x)
+
+    def __str__(self):
+        return 'Relative Difference = %.2f' % self.score
+
 
 class PercentScore(Score):
     """A percent score.

diff --git a/sciunit/tests.py b/sciunit/tests.py
@@ -125,6 +125,8 @@ def validate_observation(self, observation: dict) -> dict:
                 schema = {"schema": self.observation_schema, "type": "dict"}
             schema = {"observation": schema}
             v = ObservationValidator(schema, test=self)
+            if float(observation['std']) == 0.0:
+                print('Observation standard deviation is 0')
             if not v.validate({"observation": observation}):
                 raise ObservationError(v.errors)
         return observation
@@ -366,28 +368,33 @@ def check_score_type(self, score: Score) -> None:
             ) % (self.name, self.score_type.__name__, score.__class__.__name__)
             raise InvalidScoreError(msg)
 
-    def _judge(self, model: Model, skip_incapable: bool = True) -> Score:
+    def _judge(self, model: Model, skip_incapable: bool = True, cached_prediction=False) -> Score:
         """Generate a score for the model (internal API use only).
 
         Args:
             model (Model): A sciunit model instance.
             skip_incapable (bool, optional): Skip the incapable tests. Defaults to True.
+            predict: Whether to make a prediction or use a pre-computed one
 
         Returns:
             Score: The generated score.
         """
-        # 1.
-        self.check_capabilities(model, skip_incapable=skip_incapable)
+        if not cached_prediction:
+            # 1.
+            self.check_capabilities(model, skip_incapable=skip_incapable)
 
         # 2.
         validated = self.validate_observation(self.observation)
         if validated is not None:
             self.observation = validated
 
-        # 3.
-        prediction = self.generate_prediction(model)
-        self.check_prediction(prediction)
-        self.last_model = model
+        if not cached_prediction:
+            # 3.
+            prediction = self.generate_prediction(model)
+            self.check_prediction(prediction)
+            self.last_model = model
+        else:
+            prediction = self.prediction
 
         # 4.
         score = self.compute_score(self.observation, prediction)
@@ -400,15 +407,29 @@ def _judge(self, model: Model, skip_incapable: bool = True) -> Score:
 
         # 6.
         self._bind_score(score, model, self.observation, prediction)
-
+        
         return score
+
+
+    def feature_judge(
+        self,
+        model: Model,
+        skip_incapable: bool = False,
+        stop_on_error: bool = True,
+        deep_error: bool = False,
+    ) -> Score:
+        """For backwards compatibility"""
+        return self.judge(model, skip_incapable=skip_incapable, stop_on_error=stop_on_error,
+                          deep_error=deep_error, cached_prediction=True)
+
 
     def judge(
         self,
         model: Model,
         skip_incapable: bool = False,
         stop_on_error: bool = True,
         deep_error: bool = False,
+        cached_prediction: bool = False
     ) -> Score:
         """Generate a score for the provided model (public method).
 
@@ -460,10 +481,12 @@ def judge(
             )
 
         if deep_error:
-            score = self._judge(model, skip_incapable=skip_incapable)
+            score = self._judge(model, skip_incapable=skip_incapable,
+                                cached_prediction=cached_prediction)
         else:
             try:
-                score = self._judge(model, skip_incapable=skip_incapable)
+                score = self._judge(model, skip_incapable=skip_incapable,
+                                    cached_prediction=cached_prediction)
             except CapabilityError as e:
                 score = NAScore(str(e))
                 score.model = model