diff --git a/pvanalytics/quality/gaps.py b/pvanalytics/quality/gaps.py index a08bf03c..61d78e97 100644 --- a/pvanalytics/quality/gaps.py +++ b/pvanalytics/quality/gaps.py @@ -35,13 +35,29 @@ def _all_close_to_first(x, rtol=1e-5, atol=1e-8): return np.allclose(a=x, b=x[0], rtol=rtol, atol=atol) -def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8): +def _backfill_window(endpoints, window): + # propagate Trues in `endpoints` back `window` periods. This + # makes Trues fill the entire window, rather than just marking the + # right endpoint of each window. + # + # `endpoints` must be the output of Series.rolling with `label='right'` + flags = endpoints + while window > 0: + window = window - 1 + flags = flags | endpoints.shift(-window).fillna(False) + return flags + + +def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8, label_all=False): """Identify stale values in the data. For a window of length N, the last value (index N-1) is considered stale if all values in the window are close to the first value (index 0). + Parameters `rtol` and `atol` have the same meaning as in + :py:func:`numpy.allclose`. + Parameters ---------- x : Series @@ -53,9 +69,9 @@ def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8): relative tolerance for detecting a change in data values atol : float, default 1e-8 absolute tolerance for detecting a change in data values - - Parameters rtol and atol have the same meaning as in - numpy.allclose + label_all : bool, default False + Whether to label all values in the window. If False, then only + the right endpoint of the window is labeled. Returns ------- @@ -84,16 +100,21 @@ def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8): raw=True, kwargs={'rtol': rtol, 'atol': atol} ).fillna(False).astype(bool) + if label_all: + return _backfill_window(flags, window) return flags -def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8): +def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8, label_all=False): """Identify sequences which appear to be linear. Sequences are linear if the first difference appears to be constant. For a window of length N, the last value (index N-1) is flagged if all values in the window appear to be a line segment. + Parameters `rtol` and `atol` have the same meaning as in + :py:func:`numpy.allclose`. + Parameters ---------- x : Series @@ -105,6 +126,9 @@ def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8): tolerance relative to max(abs(x.diff()) for detecting a change atol : float, default 1e-8 absolute tolerance for detecting a change in first difference + label_all : bool, default False + Whether to label all values in the window. If False, then only the + right endpoint of the window is labeled. Returns ------- @@ -135,6 +159,8 @@ def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8): rtol=rtol, atol=atol ) + if label_all: + return _backfill_window(flags, window) return flags diff --git a/pvanalytics/tests/quality/test_gaps.py b/pvanalytics/tests/quality/test_gaps.py index c4ce6452..036225c6 100644 --- a/pvanalytics/tests/quality/test_gaps.py +++ b/pvanalytics/tests/quality/test_gaps.py @@ -118,6 +118,17 @@ def test_stale_values_diff_raises_error(stale_data): gaps.stale_values_diff(stale_data, window=1) +def test_stale_values_diff_label_all(stale_data): + """When label_all is True the full window is marked stale""" + assert_series_equal( + pd.Series([False, True, True, True, True, + True, True, True, False, False]), + gaps.stale_values_diff( + stale_data, window=4, label_all=True + ) + ) + + @pytest.fixture def interpolated_data(): """A series that contains linear interpolation. @@ -136,6 +147,17 @@ def interpolated_data(): return pd.Series(data=data) +def test_interpolation_diff_label_all(interpolated_data): + """When label_all is True the full window is marked interpoated""" + assert_series_equal( + gaps.interpolation_diff(interpolated_data, window=3, label_all=True), + pd.Series([False, False, False, False, False, + True, True, True, False, False, + False, True, True, True, True, True, + False]) + ) + + def test_interpolation_diff(interpolated_data): """Interpolation is detected correclty.