Merge pull request #56 from appukuttan-shailesh/test_m2m

Test m2m
scidash · Oct 31, 2017 · 3646336 · 3646336
2 parents 5e428ec + 3305065
commit 3646336
Showing 1 changed file with 255 additions and 0 deletions.
diff --git a/sciunit/__init__.py b/sciunit/__init__.py
@@ -397,6 +397,180 @@ def __str__(self):
         return '%s' % self.name
 
 
+class TestM2M(Test):
+    """Abstract class for handling tests involving multiple models.
+
+       Enables comparison of model to model predictions, and also against
+       experimental reference data (optional).
+
+       Note: 'TestM2M' would typically be used when handling mutliple (>2)
+       models, with/without experimental reference data. For single model
+       tests, you can use the 'Test' class.
+    """
+    def __init__(self, observation=None, name=None, **params):
+        super(TestM2M,self).__init__(observation, name=name, **params)
+
+    def validate_observation(self, observation):
+        """(Optional) Implement to validate the observation provided to the constructor.
+        Note: TestM2M does not compulsorily require an observation (i.e. None allowed).
+        """
+        pass
+
+    def compute_score(self, prediction1, prediction2):
+        """Generates a score given the observations provided in the constructor
+        and/or the prediction(s) generated by generate_prediction.
+
+        Must generate a score of score_type.
+
+        No default implementation.
+        """
+        try:
+            # After some processing of the observation and/or the prediction(s).
+            score = self.score_type.compute(prediction1,prediction2)
+            return score
+        except:
+            raise NotImplementedError(("Test %s either implements no "
+                                       "compute_score method or provides no "
+                                       "score_type with a compute method.") \
+                                       % self.name)
+
+    def _bind_score(self, score, prediction1, prediction2, model1, model2):
+        """
+        Binds some useful attributes to the score.
+        """
+        score.model1 = model1
+        score.model2 = model2
+        score.test = self
+        score.prediction1 = prediction1
+        score.prediction2 = prediction2
+        score.related_data = score.related_data.copy() # Don't let scores
+                                                     # share related_data.
+        self.bind_score(score,prediction1,prediction2,model1,model2)
+
+    def bind_score(self, score, prediction1, prediction2, model1, model2):
+        """
+        For the user to bind additional features to the score.
+        """
+        pass
+
+    def _judge(self, prediction1, prediction2, model1, model2=None):
+        # TODO: Not sure if below statement is required
+        # self.last_model = model
+
+        # 6.
+        score = self.compute_score(prediction1, prediction2)
+        if self.converter:
+            score = self.converter.convert(score)
+        # 7.
+        if not isinstance(score,(self.score_type,NoneScore,ErrorScore)):
+            raise InvalidScoreError(("Score for test '%s' is not of correct "
+                                     "type. The test requires type %s but %s "
+                                     "was provided.") \
+                                    % (self.name, self.score_type.__name__,
+                                       score.__class__.__name__))
+        # 8.
+        self._bind_score(score,prediction1,prediction2,model1,model2)
+
+        return score
+
+    def judge(self, models, skip_incapable=False, stop_on_error=True,
+                  deep_error=False):
+        """Generates a score matrix for the provided model(s).
+
+        Operates as follows:
+        1. Check if models have been specified as a list/tuple/set.
+           If not, raise exception.
+        2. Create a list of predictions. If a test observation is provided,
+           add it to predictions.
+        3. Checks if all models have all the required capabilities. If a model
+           does not, then a CapabilityError is raised.
+        4. Calls generate_prediction to generate predictions for each model,
+           and these are appeneded to the predictions list.
+        5. Generate a 2D list as a placeholder for all the scores.
+        6. Calls score_prediction to generate scores for each comparison.
+        7. Checks that the score is of score_type, raising an InvalidScoreError.
+        8. Equips the score with metadata:
+           a) Reference(s) to the model(s), in attribute model1 (and model2).
+           b) A reference to the test, in attribute test.
+           c) A reference to the predictions, in attributes prediction1 and prediction2.
+        9. Returns the score as a Pandas DataFrame.
+
+        If stop_on_error is true (default), exceptions propagate upward. If
+        false, an ErrorScore is generated containing the exception.
+
+        If deep_error is true (not default), the traceback will contain the
+        actual code execution error, instead of the content of an ErrorScore.
+        """
+
+        # 1.
+        if not isinstance(models,(list,tuple,set)):
+            raise TypeError(("Models must be specified as a list, tuple or set."
+                             "For single model tests, use 'Test' class."))
+        else:
+            models = list(models)
+
+        # 2.
+        predictions = []
+        # If observation exists, store it as first element in predictions[]
+        if self.observation:
+            predictions.append(self.observation)
+
+        for model in models:
+            if not isinstance(model, Model):
+                raise TypeError(("TestM2M's judge method received a non-Model."
+                                 "Invalid model name: '%s'" % model))
+            else:
+                try:
+                    # 3.
+                    self.check_capabilities(model, skip_incapable=skip_incapable)
+                    # 4.
+                    prediction = self.generate_prediction(model)
+                    self.check_prediction(prediction)
+                    predictions.append(prediction)
+                except CapabilityError as e:
+                    raise CapabilityError(("TestM2M's judge method resulted in error"
+                                           "for '%s'. Error: '%s'" % model, str(e)))
+                except Exception as e:
+                    raise Exception(("TestM2M's judge method resulted in error"
+                                     "for '%s'. Error: '%s'" % model, str(e)))
+
+        # 5. 2D list for scores; num(rows) = num(cols) = num(predictions)
+        scores = [[NoneScore for x in range(len(predictions))] for y in range(len(predictions))]
+
+        for i in range(len(predictions)):
+            for j in range(len(predictions)):
+                if not self.observation:
+                    model1 = models[i]
+                    model2 = models[j]
+                elif i == 0 and j==0:
+                    model1 = None
+                    model2 = None
+                elif i == 0:
+                    model1 = models[j-1]
+                    model2 = None
+                elif j == 0:
+                    model1 = models[i-1]
+                    model2 = None
+                else:
+                    model1 = models[i-1]
+                    model2 = models[j-1]
+
+                scores[i][j] = self._judge(predictions[i], predictions[j], model1, model2)
+                if isinstance(scores[i][j],ErrorScore) and stop_on_error:
+                    raise scores[i][j].score # An exception.
+
+        # 9.
+        sm = ScoreMatrixM2M(self, models, scores=scores)
+        return sm
+
+    """
+    # TODO: see if this needs to be updated and provided:
+    def optimize(self, model):
+        raise NotImplementedError(("Optimization not implemented "
+                                   "for Test '%s'" % self))
+    """
+
+
 class TestSuite(SciUnit):
     """A collection of tests."""
     def __init__(self, name, tests, weights=None, include_models=None, 
@@ -992,6 +1166,86 @@ def to_html(self, show_mean=None, sortable=None, colorize=True, *args,
 #        return HTML(html)
 
 
+class ScoreArrayM2M(pd.Series):
+    """
+    Represents an array of scores derived from TestM2M.
+    Extends the pandas Series such that items are either
+    models subject to a test or the test itself.
+    """
+
+    def __init__(self, test, models, scores):
+        items = models if not test.observation else [test]+models
+        super(ScoreArrayM2M,self).__init__(data=scores, index=items)
+
+    def __getitem__(self, item):
+        if isinstance(item,str):
+            for entry in self.index:
+                if entry.name == item or "observation" == item.lower():
+                    return self.__getitem__(entry)
+            raise KeyError("Doesn't match test, 'observation' or any model: '%s'" % item)
+        else:
+            return super(ScoreArrayM2M,self).__getitem__(item)
+
+    def __getattr__(self, name):
+        if name in ['score','sort_keys','related_data']:
+            attr = self.apply(lambda x: getattr(x,name))
+        else:
+            attr = super(ScoreArrayM2M,self).__getattribute__(name)
+        return attr
+
+    @property
+    def sort_keys(self):
+        return self.map(lambda x: x.sort_key)
+
+
+class ScoreMatrixM2M(pd.DataFrame):
+    """
+    Represents a matrix of scores derived from TestM2M.
+    Extends the pandas DataFrame such that models/observation are both
+    columns and the index.
+    """
+
+    def __init__(self, test, models, scores):
+        if not test.observation:
+            items = models
+        else:
+            # better to have header as "observation" than test.name
+            # only affects pandas.DataFrame; not test.name in individual scores
+            test.name = "observation"
+            items = [test]+models
+        super(ScoreMatrixM2M,self).__init__(data=scores, index=items, columns=items)
+        self.test = test
+        self.models = models
+
+    def __getitem__(self, item):
+        if isinstance(item,(Test,Model)):
+            return ScoreArrayM2M(self.test, self.models, scores=self.loc[item,:])
+        elif isinstance(item,str):
+            for model in self.models:
+                if model.name == item:
+                    return self.__getitem__(model)
+            if self.test.name == item or "observation" == item.lower():
+                return self.__getitem__(self.test)
+            raise KeyError("Doesn't match test, 'observation' or any model: '%s'" % item)
+        elif isinstance(item,(list,tuple)) and len(item)==2:
+            if isinstance(item[0],(Test,Model)) and isinstance(item[1],(Test,Model)):
+                return self.loc[item[0],item[1]]
+            elif isinstance(item[0],str):
+                return self.__getitem__(item[0]).__getitem__(item[1])
+        raise TypeError("Expected test/'observation'; model; test/'observation',model; model,test/'observation'; or model,model")
+
+    def __getattr__(self, name):
+        if name in ['score','sort_key','related_data']:
+            attr = self.applymap(lambda x: getattr(x,name))
+        else:
+            attr = super(ScoreMatrixM2M,self).__getattribute__(name)
+        return attr
+
+    @property
+    def sort_keys(self):
+        return self.applymap(lambda x: x.sort_key)
+
+
 class ScorePanel(pd.Panel):
     def __getitem__(self, item):
         df = super(ScorePanel,self).__getitem__(item)
@@ -1059,3 +1313,4 @@ def __init__(self, name, value):
 
         super(BadParameterValueError, self).__init__(\
         "Parameter %s has unreasonable value of %s"  % (name,value))
+