Added only_lower_triangle option to TestM2M

scidash · Nov 22, 2019 · 73fc350 · 73fc350
1 parent af8380d
commit 73fc350
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 9 deletions.
diff --git a/sciunit/scores/base.py b/sciunit/scores/base.py
@@ -31,6 +31,12 @@ def __init__(self, score, related_data=None):
     score = None
     """The score itself."""
 
+    _best = None
+    """The best possible score of this type"""
+
+    _worst = None
+    """The best possible score of this type"""
+
     _allowed_types = None
     """List of allowed types for the score argument"""
 
@@ -73,6 +79,11 @@ def _check_score(self, score):
         on the score, e.g. the range of the allowed score"""
         pass
 
+    @classmethod
+    def compute(cls, observation, prediction):
+        """Compute whether the observation equals the prediction."""
+        return NotImplementedError("")
+
     @property
     def norm_score(self):
         """A floating point version of the score used for sorting.

diff --git a/sciunit/scores/complete.py b/sciunit/scores/complete.py
@@ -51,6 +51,10 @@ class ZScore(Score):
                     'prediction divided by the standard deviation of the '
                     'observation')
 
+    _best = 0.0  # A Z-Score of 0.0 is best
+
+    _worst = np.inf  # A Z-score of infinity (or negative infinity) is worst
+
     @classmethod
     def compute(cls, observation, prediction):
         """Compute a z-score from an observation and a prediction."""
@@ -139,6 +143,8 @@ class RatioScore(Score):
 
     _description = ('The ratio between the prediction and the observation')
 
+    _best = 1.0  # A RatioScore of 1.0 is best
+
     def _check_score(self, score):
         if score < 0.0:
             raise errors.InvalidScoreError(("RatioScore was initialized with "

diff --git a/sciunit/tests.py b/sciunit/tests.py
@@ -214,6 +214,11 @@ def compute_score(self, observation, prediction):
         score = self.score_type.compute(observation, prediction)
         return score
 
+    def ace(self):
+        """Generate the best possible score of the associated score type."""
+        score = self.score_type(self.score_type._best)
+        return score
+
     def _bind_score(self, score, model, observation, prediction):
         """Bind some useful attributes to the score."""
         score.model = model
@@ -403,13 +408,18 @@ def compute_score(self, prediction1, prediction2):
         """
         try:
             # After some processing of the observation and/or the prediction(s)
-            score = self.score_type.compute(prediction1, prediction2)
-            return score
+            f = self.score_type.compute
         except Exception:
-            raise NotImplementedError(("Test %s either implements no "
-                                       "compute_score method or provides no "
-                                       "score_type with a compute method.")
-                                      % self.name)
+            msg = ("Test implemented no `compute_score` method. "
+                   "But score_type of %s also has no "
+                   "compute method.") % self.score_type
+            raise NotImplementedError(msg)
+        try:
+            score = f(prediction1, prediction2)
+        except Exception as e:
+            msg = "%s.compute failed: %s" % (self.score_type.__name__, str(e))
+            raise Exception(msg)
+        return score
 
     def _bind_score(self, score, prediction1, prediction2, model1, model2):
         """Bind some useful attributes to the score."""
@@ -447,8 +457,12 @@ def _judge(self, prediction1, prediction2, model1, model2=None):
         return score
 
     def judge(self, models, skip_incapable=False, stop_on_error=True,
-              deep_error=False):
+              deep_error=False, only_lower_triangle=False):
         """Generate a score matrix for the provided model(s).
+        `only_lower_triangle`: Only compute the lower triangle (not include
+                               the diagonal) of this square ScoreMatrix and
+                               copy the other values across. Leave the diagonal
+                               blank.  If False, compute all.
 
         Operates as follows:
         1. Check if models have been specified as a list/tuple/set.
@@ -535,8 +549,15 @@ def judge(self, models, skip_incapable=False, stop_on_error=True,
                     model1 = models[i-1]
                     model2 = models[j-1]
 
-                scores[i][j] = self._judge(predictions[i], predictions[j],
-                                           model1, model2)
+                if i == j and only_lower_triangle:
+                    # Perfect score for self-comparison
+                    scores[i][j] = self.ace()
+                elif i > j and only_lower_triangle:
+                    # Should already be computed earlier in this loop
+                    scores[i][j] = scores[j][i]
+                else:
+                    scores[i][j] = self._judge(predictions[i], predictions[j],
+                                               model1, model2)
                 if isinstance(scores[i][j], ErrorScore) and stop_on_error:
                     raise scores[i][j].score  # An exception.