diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index f5ea4d7875931..efe713639fec9 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -53,7 +53,11 @@ def setup(self): self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B') self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000) - self.a = self.rng7[:50000].append(self.rng7[50002:]) + self.no_freq = self.rng7[:50000].append(self.rng7[50002:]) + self.d_freq = self.rng7[:50000].append(self.rng7[50000:]) + + self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000) + self.b_freq = self.rng8[:50000].append(self.rng8[50000:]) def time_add_timedelta(self): (self.rng + dt.timedelta(minutes=2)) @@ -94,8 +98,14 @@ def time_infer_dst(self): def time_timeseries_is_month_start(self): self.rng6.is_month_start - def time_infer_freq(self): - infer_freq(self.a) + def time_infer_freq_none(self): + infer_freq(self.no_freq) + + def time_infer_freq_daily(self): + infer_freq(self.d_freq) + + def time_infer_freq_business(self): + infer_freq(self.b_freq) class TimeDatetimeConverter(object): diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index f21230693686e..a3a44feb55c7c 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -67,7 +67,6 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36ca79e8b8714..11b31a7f4c55c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -113,7 +113,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - +- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) Sparse ^^^^^^ diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 79fe0a864f246..08cf5108ffdb1 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -564,15 +564,23 @@ def test_freq_conversion(self): class TestSlicing(object): + @pytest.mark.parametrize('freq', ['B', 'D']) + def test_timedelta(self, freq): + index = date_range('1/1/2000', periods=50, freq=freq) - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') shifted = index + timedelta(1) back = shifted + timedelta(-1) - assert tm.equalContents(index, back) - assert shifted.freq == index.freq - assert shifted.freq == back.freq + tm.assert_index_equal(index, back) + + if freq == 'D': + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) result = index - timedelta(1) expected = index + timedelta(-1) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 2edca1bd4676b..54d12317b0bf8 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -504,9 +504,14 @@ def test_raise_if_too_few(self): pytest.raises(ValueError, frequencies.infer_freq, index) def test_business_daily(self): - index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + index = _dti(['01/01/1999', '1/4/1999', '1/5/1999']) assert frequencies.infer_freq(index) == 'B' + def test_business_daily_look_alike(self): + # GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place + index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + assert frequencies.infer_freq(index) is None + def test_day(self): self._check_tick(timedelta(1), 'D') diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index dddf835424f67..8640f106a048a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -975,8 +975,7 @@ def _infer_daily_rule(self): else: return _maybe_add_count('D', days) - # Business daily. Maybe - if self.day_deltas == [1, 3]: + if self._is_business_daily(): return 'B' wom_rule = self._get_wom_rule() @@ -1012,6 +1011,19 @@ def _get_monthly_rule(self): return {'cs': 'MS', 'bs': 'BMS', 'ce': 'M', 'be': 'BM'}.get(pos_check) + def _is_business_daily(self): + # quick check: cannot be business daily + if self.day_deltas != [1, 3]: + return False + + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(self.index.asi8) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + return np.all(((weekdays == 0) & (shifts == 3)) | + ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) + def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary