Skip to content

Commit

Permalink
Dev (#181)
Browse files Browse the repository at this point in the history
* Added RelativeDifferenceScore

* fixup! Added RelativeDifferenceScore

* make judging from features more convenient, this method expects uses to have pre-existing code that performs something like judge generate prediction. Prediction is then just treated as a settable object attribute

* Refactored cached prediction logic for tests

* Restored feature_judge for backwards compatibility

Co-authored-by: Russell Jarvis <russelljarvis@protonmail.com>
  • Loading branch information
rgerkin and russelljjarvis committed May 3, 2021
1 parent eeb64f3 commit 21821a3
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 12 deletions.
1 change: 0 additions & 1 deletion sciunit/models/runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def set_backend(self, backend: Union[str, tuple, list, None]):
Exception: The backend was not found.
"""
if inspect.isclass(backend) and Backend in backend.__bases__:
print(backend.__name__)
name = backend.__name__
args = []
kwargs = {}
Expand Down
78 changes: 77 additions & 1 deletion sciunit/scores/complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import quantities as pq
from typing import Union
from sciunit import errors, utils

from .base import Score
Expand Down Expand Up @@ -213,8 +214,83 @@ def norm_score(self) -> float:
return 1 - 2 * math.fabs(0.5 - cdf)

def __str__(self):
return "Ratio = %.2f" % self.score
return 'Ratio = %.2f' % self.score


class RelativeDifferenceScore(Score):
"""A relative difference between prediction and observation.
The absolute value of the difference between the prediction and the
observation is divided by a reference value with the same units. This
reference scale should be chosen for each test such that normalization
produces directly comparable scores across tests. For example, if 5 volts
represents a medium size difference for TestA, and 10 seconds represents a
medium size difference for TestB, then 5 volts and 10 seconds should be
used for this reference scale in TestA and TestB, respectively. The
attribute `scale` can be passed to the compute method or set for the whole
class in advance. Otherwise, a scale of 1 (in the units of the
observation and prediction) will be used.
"""

_allowed_types = (float,)

_description = ('The relative difference between the prediction and the observation')

_best = 0.0 # A RelativeDifferenceScore of 0.0 is best

_worst = np.inf

scale = None

def _check_score(self, score):
if score < 0.0:
raise errors.InvalidScoreError(("RelativeDifferenceScore was initialized with "
"a score of %f, but a RelativeDifferenceScore "
"must be non-negative.") % score)

@classmethod
def compute(cls, observation: Union[dict, float, int, pq.Quantity],
prediction: Union[dict, float, int, pq.Quantity],
key=None,
scale: Union[float, int, pq.Quantity, None] = None) -> 'RelativeDifferenceScore':
"""Compute the relative difference between the observation and a prediction.
Returns:
RelativeDifferenceScore: A relative difference between an observation and a prediction.
"""
assert isinstance(observation, (dict, float, int, pq.Quantity))
assert isinstance(prediction, (dict, float, int, pq.Quantity))

obs, pred = cls.extract_means_or_values(observation, prediction,
key=key)

scale = scale or cls.scale or (obs/float(obs))
assert type(obs) is type(scale)
assert type(obs) is type(pred)
if isinstance(obs, pq.Quantity):
assert obs.units == pred.units, \
"Prediction must have the same units as the observation"
assert obs.units == scale.units, \
"RelativeDifferenceScore.Scale must have the same units as the observation"
assert scale > 0, \
"RelativeDifferenceScore.scale must be positive (not %g)" % scale
value = np.abs(pred - obs) / scale
value = utils.assert_dimensionless(value)
return RelativeDifferenceScore(value)

@property
def norm_score(self) -> float:
"""Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values.
Returns:
float: The value of the norm score.
"""
x = self.score
return 1 / (1+x)

def __str__(self):
return 'Relative Difference = %.2f' % self.score


class PercentScore(Score):
"""A percent score.
Expand Down
43 changes: 33 additions & 10 deletions sciunit/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def validate_observation(self, observation: dict) -> dict:
schema = {"schema": self.observation_schema, "type": "dict"}
schema = {"observation": schema}
v = ObservationValidator(schema, test=self)
if float(observation['std']) == 0.0:
print('Observation standard deviation is 0')
if not v.validate({"observation": observation}):
raise ObservationError(v.errors)
return observation
Expand Down Expand Up @@ -366,28 +368,33 @@ def check_score_type(self, score: Score) -> None:
) % (self.name, self.score_type.__name__, score.__class__.__name__)
raise InvalidScoreError(msg)

def _judge(self, model: Model, skip_incapable: bool = True) -> Score:
def _judge(self, model: Model, skip_incapable: bool = True, cached_prediction=False) -> Score:
"""Generate a score for the model (internal API use only).
Args:
model (Model): A sciunit model instance.
skip_incapable (bool, optional): Skip the incapable tests. Defaults to True.
predict: Whether to make a prediction or use a pre-computed one
Returns:
Score: The generated score.
"""
# 1.
self.check_capabilities(model, skip_incapable=skip_incapable)
if not cached_prediction:
# 1.
self.check_capabilities(model, skip_incapable=skip_incapable)

# 2.
validated = self.validate_observation(self.observation)
if validated is not None:
self.observation = validated

# 3.
prediction = self.generate_prediction(model)
self.check_prediction(prediction)
self.last_model = model
if not cached_prediction:
# 3.
prediction = self.generate_prediction(model)
self.check_prediction(prediction)
self.last_model = model
else:
prediction = self.prediction

# 4.
score = self.compute_score(self.observation, prediction)
Expand All @@ -400,15 +407,29 @@ def _judge(self, model: Model, skip_incapable: bool = True) -> Score:

# 6.
self._bind_score(score, model, self.observation, prediction)

return score


def feature_judge(
self,
model: Model,
skip_incapable: bool = False,
stop_on_error: bool = True,
deep_error: bool = False,
) -> Score:
"""For backwards compatibility"""
return self.judge(model, skip_incapable=skip_incapable, stop_on_error=stop_on_error,
deep_error=deep_error, cached_prediction=True)


def judge(
self,
model: Model,
skip_incapable: bool = False,
stop_on_error: bool = True,
deep_error: bool = False,
cached_prediction: bool = False
) -> Score:
"""Generate a score for the provided model (public method).
Expand Down Expand Up @@ -460,10 +481,12 @@ def judge(
)

if deep_error:
score = self._judge(model, skip_incapable=skip_incapable)
score = self._judge(model, skip_incapable=skip_incapable,
cached_prediction=cached_prediction)
else:
try:
score = self._judge(model, skip_incapable=skip_incapable)
score = self._judge(model, skip_incapable=skip_incapable,
cached_prediction=cached_prediction)
except CapabilityError as e:
score = NAScore(str(e))
score.model = model
Expand Down

0 comments on commit 21821a3

Please sign in to comment.