pvlib · wfvining · May 28, 2020 · May 26, 2020 · May 27, 2020 · May 28, 2020
diff --git a/pvanalytics/quality/gaps.py b/pvanalytics/quality/gaps.py
@@ -35,13 +35,29 @@ def _all_close_to_first(x, rtol=1e-5, atol=1e-8):
     return np.allclose(a=x, b=x[0], rtol=rtol, atol=atol)
 
 
-def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8):
+def _backfill_window(endpoints, window):
+    # propagate Trues in `endpoints` back `window` periods.  This
+    # makes Trues fill the entire window, rather than just marking the
+    # right endpoint of each window.
+    #
+    # `endpoints` must be the output of Series.rolling with `label='right'`
+    flags = endpoints
+    while window > 0:
+        window = window - 1
+        flags = flags | endpoints.shift(-window).fillna(False)
+    return flags
+
+
+def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8, label_all=False):
     """Identify stale values in the data.
 
     For a window of length N, the last value (index N-1) is considered
     stale if all values in the window are close to the first value
     (index 0).
 
+    Parameters `rtol` and `atol` have the same meaning as in
+    :py:func:`numpy.allclose`.
+
     Parameters
     ----------
     x : Series
@@ -53,9 +69,9 @@ def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8):
         relative tolerance for detecting a change in data values
     atol : float, default 1e-8
         absolute tolerance for detecting a change in data values
-
-    Parameters rtol and atol have the same meaning as in
-    numpy.allclose
+    label_all : bool, default False
+        Whether to label all values in the window. If False, then only
+        the right endpoint of the window is labeled.
 
     Returns
     -------
@@ -84,16 +100,21 @@ def stale_values_diff(x, window=3, rtol=1e-5, atol=1e-8):
         raw=True,
         kwargs={'rtol': rtol, 'atol': atol}
     ).fillna(False).astype(bool)
+    if label_all:
+        return _backfill_window(flags, window)
     return flags
 
 
-def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8):
+def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8, label_all=False):
     """Identify sequences which appear to be linear.
 
     Sequences are linear if the first difference appears to be
     constant.  For a window of length N, the last value (index N-1) is
     flagged if all values in the window appear to be a line segment.
 
+    Parameters `rtol` and `atol` have the same meaning as in
+    :py:func:`numpy.allclose`.
+
     Parameters
     ----------
     x : Series
@@ -105,6 +126,9 @@ def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8):
         tolerance relative to max(abs(x.diff()) for detecting a change
     atol : float, default 1e-8
         absolute tolerance for detecting a change in first difference
+    label_all : bool, default False
+        Whether to label all values in the window. If False, then only the
+        right endpoint of the window is labeled.
 
     Returns
     -------
@@ -135,6 +159,8 @@ def interpolation_diff(x, window=3, rtol=1e-5, atol=1e-8):
         rtol=rtol,
         atol=atol
     )
+    if label_all:
+        return _backfill_window(flags, window)
     return flags
 
 

diff --git a/pvanalytics/tests/quality/test_gaps.py b/pvanalytics/tests/quality/test_gaps.py
@@ -118,6 +118,17 @@ def test_stale_values_diff_raises_error(stale_data):
         gaps.stale_values_diff(stale_data, window=1)
 
 
+def test_stale_values_diff_label_all(stale_data):
+    """When label_all is True the full window is marked stale"""
+    assert_series_equal(
+        pd.Series([False, True, True, True, True,
+                   True, True, True, False, False]),
+        gaps.stale_values_diff(
+            stale_data, window=4, label_all=True
+        )
+    )
+
+
 @pytest.fixture
 def interpolated_data():
     """A series that contains linear interpolation.
@@ -136,6 +147,17 @@ def interpolated_data():
     return pd.Series(data=data)
 
 
+def test_interpolation_diff_label_all(interpolated_data):
+    """When label_all is True the full window is marked interpoated"""
+    assert_series_equal(
+        gaps.interpolation_diff(interpolated_data, window=3, label_all=True),
+        pd.Series([False, False, False, False, False,
+                   True, True, True, False, False,
+                   False, True, True, True, True, True,
+                   False])
+    )
+
+
 def test_interpolation_diff(interpolated_data):
     """Interpolation is detected correclty.