diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 58e1b2370c7c8..b23a0f10e9e2b 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -989,6 +989,36 @@ a single date rather than the entire array. os.remove('tmp.csv') + +.. _io.csv.mixed_timezones: + +Parsing a CSV with mixed Timezones +++++++++++++++++++++++++++++++++++ + +Pandas cannot natively represent a column or index with mixed timezones. If your CSV +file contains columns with a mixture of timezones, the default result will be +an object-dtype column with strings, even with ``parse_dates``. + + +.. ipython:: python + + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(StringIO(content), parse_dates=['a']) + df['a'] + +To parse the mixed-timezone values as a datetime column, pass a partially-applied +:func:`to_datetime` with ``utc=True`` as the ``date_parser``. + +.. ipython:: python + + df = pd.read_csv(StringIO(content), parse_dates=['a'], + date_parser=lambda col: pd.to_datetime(col, utc=True)) + df['a'] + + .. _io.dayfirst: diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 16319a3b83ca4..a49ea2cf493a6 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -648,6 +648,52 @@ that the dates have been converted to UTC pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) + +.. _whatsnew_0240.api_breaking.read_csv_mixed_tz: + +Parsing mixed-timezones with :func:`read_csv` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`). + +*Previous Behavior* + +.. code-block:: python + + >>> import io + >>> content = """\ + ... a + ... 2000-01-01T00:00:00+05:00 + ... 2000-01-01T00:00:00+06:00""" + >>> df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + >>> df.a + 0 1999-12-31 19:00:00 + 1 1999-12-31 18:00:00 + Name: a, dtype: datetime64[ns] + +*New Behavior* + +.. ipython:: python + + import io + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + df.a + +As can be seen, the ``dtype`` is object; each value in the column is a string. +To convert the strings to an array of datetimes, the ``date_parser`` argument + +.. ipython:: python + + df = pd.read_csv(io.StringIO(content), parse_dates=['a'], + date_parser=lambda col: pd.to_datetime(col, utc=True)) + df.a + +See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more. + .. _whatsnew_0240.api_breaking.period_end_time: Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index de33ce64c1597..78eba1fe5d025 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -74,6 +74,11 @@ Bug Fixes - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) +**Visualization** + +- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`). + + **Other** - diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d7a8417a71be2..f9aef2401a4d3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2058,7 +2058,7 @@ def validate_tz_from_dtype(dtype, tz): # tz-naive dtype (i.e. datetime64[ns]) if tz is not None and not timezones.tz_compare(tz, dtz): raise ValueError("cannot supply both a tz and a " - "timezone-naive dtype (i.e. datetime64[ns]") + "timezone-naive dtype (i.e. datetime64[ns])") return tz diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b31d3f665f47f..4163a571df800 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -203,9 +203,14 @@ * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call result 'foo' - If a column or index contains an unparseable date, the entire column or - index will be returned unaltered as an object data type. For non-standard - datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv`` + If a column or index cannot be represented as an array of datetimes, + say because of an unparseable value or a mixture of timezones, the column + or index will be returned unaltered as an object data type. For + non-standard datetime parsing, use ``pd.to_datetime`` after + ``pd.read_csv``. To parse an index or column with a mixture of timezones, + specify ``date_parser`` to be a partially-applied + :func:`pandas.to_datetime` with ``utc=True``. See + :ref:`io.csv.mixed_timezones` for more. Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : bool, default False diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e543ab88f53b2..85549bafa8dc0 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -39,7 +39,7 @@ else: _HAS_MPL = True if get_option('plotting.matplotlib.register_converters'): - _converter.register(explicit=True) + _converter.register(explicit=False) def _raise_if_no_mpl(): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 499f01f0e7f7b..d72dccadf0ac0 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -30,7 +30,12 @@ def setup_indices(self): def test_pickle_compat_construction(self): # need an object to create with - pytest.raises(TypeError, self._holder) + msg = (r"Index\(\.\.\.\) must be called with a collection of some" + r" kind, None was passed|" + r"__new__\(\) missing 1 required positional argument: 'data'|" + r"__new__\(\) takes at least 2 arguments \(1 given\)") + with pytest.raises(TypeError, match=msg): + self._holder() def test_to_series(self): # assert that we are creating a copy of the index @@ -84,8 +89,11 @@ def test_shift(self): # GH8083 test the base class for shift idx = self.create_index() - pytest.raises(NotImplementedError, idx.shift, 1) - pytest.raises(NotImplementedError, idx.shift, 1, 2) + msg = "Not supported for type {}".format(type(idx).__name__) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) def test_create_index_existing_name(self): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7ebebbf6dee28..6893f635c82ac 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -135,8 +135,10 @@ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): tm.assert_index_equal(i2, expected) # incompat tz/dtype - pytest.raises(ValueError, lambda: DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(i.tz_localize(None).asi8, + dtype=i.dtype, tz='US/Pacific') def test_construction_index_with_mixed_timezones(self): # gh-11488: no tz results in DatetimeIndex @@ -439,14 +441,19 @@ def test_constructor_coverage(self): tm.assert_index_equal(from_ints, expected) # non-conforming - pytest.raises(ValueError, DatetimeIndex, - ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') + msg = ("Inferred frequency None from passed values does not conform" + " to passed frequency D") + with pytest.raises(ValueError, match=msg): + DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') - pytest.raises(ValueError, date_range, start='2011-01-01', - freq='b') - pytest.raises(ValueError, date_range, end='2011-01-01', - freq='B') - pytest.raises(ValueError, date_range, periods=10, freq='D') + msg = ("Of the four parameters: start, end, periods, and freq, exactly" + " three must be specified") + with pytest.raises(ValueError, match=msg): + date_range(start='2011-01-01', freq='b') + with pytest.raises(ValueError, match=msg): + date_range(end='2011-01-01', freq='B') + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq='D') @pytest.mark.parametrize('freq', ['AS', 'W-SUN']) def test_constructor_datetime64_tzformat(self, freq): @@ -511,18 +518,20 @@ def test_constructor_dtype(self): idx = DatetimeIndex(['2013-01-01', '2013-01-02'], dtype='datetime64[ns, US/Eastern]') - pytest.raises(ValueError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns]')) + msg = ("cannot supply both a tz and a timezone-naive dtype" + r" \(i\.e\. datetime64\[ns\]\)") + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, dtype='datetime64[ns]') # this is effectively trying to convert tz's - pytest.raises(TypeError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns, CET]')) - pytest.raises(ValueError, - lambda: DatetimeIndex( - idx, tz='CET', - dtype='datetime64[ns, US/Eastern]')) + msg = ("data is already tz-aware US/Eastern, unable to set specified" + " tz: CET") + with pytest.raises(TypeError, match=msg): + DatetimeIndex(idx, dtype='datetime64[ns, CET]') + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, tz='CET', dtype='datetime64[ns, US/Eastern]') + result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') tm.assert_index_equal(idx, result) @@ -732,7 +741,9 @@ def test_from_freq_recreate_from_data(self, freq): def test_datetimeindex_constructor_misc(self): arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - pytest.raises(Exception, DatetimeIndex, arr) + msg = r"(\(u?')?Unknown string format(:', 'Jn 3, 2005'\))?" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(arr) arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] idx1 = DatetimeIndex(arr) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index a9bece248e9d0..a38ee264d362c 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -346,8 +346,10 @@ def test_compat_replace(self, f): def test_catch_infinite_loop(self): offset = offsets.DateOffset(minute=5) # blow up, don't loop forever - pytest.raises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) + msg = "Offset did not increment date" + with pytest.raises(ValueError, match=msg): + date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), + freq=offset) @pytest.mark.parametrize('periods', (1, 2)) def test_wom_len(self, periods): diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index cec181161fc11..fc6080e68a803 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -190,7 +190,9 @@ def test_datetimeindex_accessors(self): # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) - pytest.raises(ValueError, lambda: dti.is_month_start) + msg = "Custom business days is not supported by is_month_start" + with pytest.raises(ValueError, match=msg): + dti.is_month_start dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 2a546af79931e..84085141fcf92 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -37,15 +37,19 @@ def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH#7206 + msg = "'Series' object has no attribute '{}'" for op in ['year', 'day', 'second', 'weekday']: - pytest.raises(TypeError, lambda x: getattr(self.dt_series, op)) + with pytest.raises(AttributeError, match=msg.format(op)): + getattr(self.dt_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 - pytest.raises(AttributeError, lambda: s.weekday) + msg = "'Series' object has no attribute 'weekday'" + with pytest.raises(AttributeError, match=msg): + s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 1b2aab9d370a3..a0c9d9f02385c 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -170,7 +170,8 @@ def test_partial_slice(self): result = s['2005-1-1'] assert result == s.iloc[0] - pytest.raises(Exception, s.__getitem__, '2004-12-31') + with pytest.raises(KeyError, match=r"^'2004-12-31'$"): + s['2004-12-31'] def test_partial_slice_daily(self): rng = date_range(freq='H', start=datetime(2005, 1, 31), periods=500) @@ -179,7 +180,8 @@ def test_partial_slice_daily(self): result = s['2005-1-31'] tm.assert_series_equal(result, s.iloc[:24]) - pytest.raises(Exception, s.__getitem__, '2004-12-31 00') + with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"): + s['2004-12-31 00'] def test_partial_slice_hourly(self): rng = date_range(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), @@ -193,7 +195,8 @@ def test_partial_slice_hourly(self): tm.assert_series_equal(result, s.iloc[:60]) assert s['2005-1-1 20:00'] == s.iloc[0] - pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15') + with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"): + s['2004-12-31 00:15'] def test_partial_slice_minutely(self): rng = date_range(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), @@ -207,7 +210,8 @@ def test_partial_slice_minutely(self): tm.assert_series_equal(result, s.iloc[:60]) assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0] - pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00') + with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"): + s['2004-12-31 00:00:00'] def test_partial_slice_second_precision(self): rng = date_range(start=datetime(2005, 1, 1, 0, 0, 59, @@ -255,7 +259,9 @@ def test_partial_slicing_dataframe(self): result = df['a'][ts_string] assert isinstance(result, np.int64) assert result == expected - pytest.raises(KeyError, df.__getitem__, ts_string) + msg = r"^'{}'$".format(ts_string) + with pytest.raises(KeyError, match=msg): + df[ts_string] # Timestamp with resolution less precise than index for fmt in formats[:rnum]: @@ -282,15 +288,20 @@ def test_partial_slicing_dataframe(self): result = df['a'][ts_string] assert isinstance(result, np.int64) assert result == 2 - pytest.raises(KeyError, df.__getitem__, ts_string) + msg = r"^'{}'$".format(ts_string) + with pytest.raises(KeyError, match=msg): + df[ts_string] # Not compatible with existing key # Should raise KeyError for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: ts = index[1] + Timedelta("1 " + res) ts_string = ts.strftime(fmt) - pytest.raises(KeyError, df['a'].__getitem__, ts_string) - pytest.raises(KeyError, df.__getitem__, ts_string) + msg = r"^'{}'$".format(ts_string) + with pytest.raises(KeyError, match=msg): + df['a'][ts_string] + with pytest.raises(KeyError, match=msg): + df[ts_string] def test_partial_slicing_with_multiindex(self): @@ -316,11 +327,10 @@ def test_partial_slicing_with_multiindex(self): # this is an IndexingError as we don't do partial string selection on # multi-levels. - def f(): + msg = "Too many indexers" + with pytest.raises(IndexingError, match=msg): df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - pytest.raises(IndexingError, f) - # GH 4294 # partial slice on a series mi s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range( diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 680eddd27cf9f..42338a751e0fc 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + import pandas as pd from pandas import DatetimeIndex, Timestamp, date_range import pandas.util.testing as tm @@ -27,10 +29,14 @@ def test_dti_date(self): expected = [t.date() for t in rng] assert (result == expected).all() - def test_dti_date_out_of_range(self): + @pytest.mark.parametrize('data', [ + ['1400-01-01'], + [datetime(1400, 1, 1)]]) + def test_dti_date_out_of_range(self, data): # GH#1475 - pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) - pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + msg = "Out of bounds nanosecond timestamp: 1400-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + DatetimeIndex(data) @pytest.mark.parametrize('field', [ 'dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', @@ -74,9 +80,15 @@ def test_round_daily(self): result = dti.round('s') tm.assert_index_equal(result, dti) - # invalid - for freq in ['Y', 'M', 'foobar']: - pytest.raises(ValueError, lambda: dti.round(freq)) + @pytest.mark.parametrize('freq, error_msg', [ + ('Y', ' is a non-fixed frequency'), + ('M', ' is a non-fixed frequency'), + ('foobar', 'Invalid frequency: foobar')]) + def test_round_invalid(self, freq, error_msg): + dti = date_range('20130101 09:10:11', periods=5) + dti = dti.tz_localize('UTC').tz_convert('US/Eastern') + with pytest.raises(ValueError, match=error_msg): + dti.round(freq) def test_round(self, tz_naive_fixture): tz = tz_naive_fixture diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bec2fa66c43cd..38f5eab15041f 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -346,12 +346,16 @@ def test_to_datetime_dt64s(self, cache): for dt in in_bound_dts: assert pd.to_datetime(dt, cache=cache) == Timestamp(dt) - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') - pytest.raises(ValueError, Timestamp, dt) - assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT + @pytest.mark.parametrize('dt', [np.datetime64('1000-01-01'), + np.datetime64('5000-01-02')]) + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): + msg = "Out of bounds nanosecond timestamp: {}".format(dt) + with pytest.raises(OutOfBoundsDatetime, match=msg): + pd.to_datetime(dt, errors='raise') + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(dt) + assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_array_of_dt64s(self, cache): @@ -367,8 +371,9 @@ def test_to_datetime_array_of_dt64s(self, cache): # A list of datetimes where the last one is out of bounds dts_with_oob = dts + [np.datetime64('9999-01-01')] - pytest.raises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') + msg = "Out of bounds nanosecond timestamp: 9999-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + pd.to_datetime(dts_with_oob, errors='raise') tm.assert_numpy_array_equal( pd.to_datetime(dts_with_oob, box=False, errors='coerce', @@ -410,7 +415,10 @@ def test_to_datetime_tz(self, cache): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache)) + msg = ("Tz-aware datetime.datetime cannot be converted to datetime64" + " unless utc=True") + with pytest.raises(ValueError, match=msg): + pd.to_datetime(arr, cache=cache) @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): @@ -1088,9 +1096,9 @@ def test_to_datetime_on_datetime64_series(self, cache): def test_to_datetime_with_space_in_series(self, cache): # GH 6428 s = Series(['10/18/2006', '10/18/2008', ' ']) - pytest.raises(ValueError, lambda: to_datetime(s, - errors='raise', - cache=cache)) + msg = r"(\(u?')?String does not contain a date(:', ' '\))?" + with pytest.raises(ValueError, match=msg): + to_datetime(s, errors='raise', cache=cache) result_coerce = to_datetime(s, errors='coerce', cache=cache) expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), @@ -1111,13 +1119,12 @@ def test_to_datetime_with_apply(self, cache): assert_series_equal(result, expected) td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - pytest.raises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise', - cache=cache)) - pytest.raises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise', cache=cache)) + msg = r"time data '' does not match format '%b %y' \(match\)" + with pytest.raises(ValueError, match=msg): + pd.to_datetime(td, format='%b %y', errors='raise', cache=cache) + with pytest.raises(ValueError, match=msg): + td.apply(pd.to_datetime, format='%b %y', + errors='raise', cache=cache) expected = pd.to_datetime(td, format='%b %y', errors='coerce', cache=cache) @@ -1168,8 +1175,9 @@ def test_to_datetime_unprocessable_input(self, cache, box, klass): result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box) expected = klass(np.array([1, '1'], dtype='O')) tm.assert_equal(result, expected) - pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', - cache=cache, box=box) + msg = "invalid string coercion to datetime" + with pytest.raises(TypeError, match=msg): + to_datetime([1, '1'], errors='raise', cache=cache, box=box) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -1225,17 +1233,18 @@ def test_string_na_nat_conversion(self, cache): malformed = np.array(['1/100/2000', np.nan], dtype=object) # GH 10636, default is now 'raise' - pytest.raises(ValueError, - lambda: to_datetime(malformed, errors='raise', - cache=cache)) + msg = (r"\(u?'Unknown string format:', '1/100/2000'\)|" + "day is out of range for month") + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors='raise', cache=cache) result = to_datetime(malformed, errors='ignore', cache=cache) # GH 21864 expected = Index(malformed) tm.assert_index_equal(result, expected) - pytest.raises(ValueError, to_datetime, malformed, errors='raise', - cache=cache) + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors='raise', cache=cache) idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, @@ -1414,14 +1423,24 @@ def test_day_not_in_month_coerce(self, cache): @pytest.mark.parametrize('cache', [True, False]) def test_day_not_in_month_raise(self, cache): - pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise', cache=cache) - pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d", cache=cache) - pytest.raises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d", cache=cache) - pytest.raises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d", cache=cache) + msg = "day is out of range for month" + with pytest.raises(ValueError, match=msg): + to_datetime('2015-02-29', errors='raise', cache=cache) + + msg = "time data 2015-02-29 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime('2015-02-29', errors='raise', format="%Y-%m-%d", + cache=cache) + + msg = "time data 2015-02-32 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime('2015-02-32', errors='raise', format="%Y-%m-%d", + cache=cache) + + msg = "time data 2015-04-31 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime('2015-04-31', errors='raise', format="%Y-%m-%d", + cache=cache) @pytest.mark.parametrize('cache', [True, False]) def test_day_not_in_month_ignore(self, cache): @@ -1656,7 +1675,9 @@ def test_parsers_time(self): assert tools.to_time(time_string) == expected new_string = "14.15" - pytest.raises(ValueError, tools.to_time, new_string) + msg = r"Cannot convert arg \['14\.15'\] to a time" + with pytest.raises(ValueError, match=msg): + tools.to_time(new_string) assert tools.to_time(new_string, format="%H.%M") == expected arg = ["14:15", "20:20"] diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 911cd990ab881..48debfa2848e7 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -95,7 +95,10 @@ def test_resample_interpolate_all_ts(frame): def test_raises_on_non_datetimelike_index(): # this is a non datetimelike index xp = DataFrame() - pytest.raises(TypeError, lambda: xp.resample('A').mean()) + msg = ("Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex," + " but got an instance of 'Index'") + with pytest.raises(TypeError, match=msg): + xp.resample('A').mean() @pytest.mark.parametrize('freq', ['M', 'D', 'H']) @@ -189,8 +192,10 @@ def test_resample_loffset_arg_type_all_ts(frame, create_index): # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex if isinstance(expected.index, TimedeltaIndex): - with pytest.raises(AssertionError): + msg = "DataFrame are different" + with pytest.raises(AssertionError, match=msg): assert_frame_equal(result_agg, expected) + with pytest.raises(AssertionError, match=msg): assert_frame_equal(result_how, expected) else: assert_frame_equal(result_agg, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 73995cbe79ecd..74487052f8982 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -113,16 +113,18 @@ def test_resample_basic_grouper(series): @pytest.mark.parametrize( '_index_start,_index_end,_index_name', [('1/1/2000 00:00:00', '1/1/2000 00:13:00', 'index')]) -@pytest.mark.parametrize('kwargs', [ - dict(label='righttt'), - dict(closed='righttt'), - dict(convention='starttt') +@pytest.mark.parametrize('keyword,value', [ + ('label', 'righttt'), + ('closed', 'righttt'), + ('convention', 'starttt') ]) -def test_resample_string_kwargs(series, kwargs): +def test_resample_string_kwargs(series, keyword, value): # see gh-19303 # Check that wrong keyword argument strings raise an error - with pytest.raises(ValueError, match='Unsupported value'): - series.resample('5min', **kwargs) + msg = "Unsupported value {value} for `{keyword}`".format( + value=value, keyword=keyword) + with pytest.raises(ValueError, match=msg): + series.resample('5min', **({keyword: value})) @pytest.mark.parametrize( @@ -676,7 +678,7 @@ def test_asfreq_non_unique(): ts = Series(np.random.randn(len(rng2)), index=rng2) msg = 'cannot reindex from a duplicate axis' - with pytest.raises(Exception, match=msg): + with pytest.raises(ValueError, match=msg): ts.asfreq('B') diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index c2fbb5bbb088c..8abdf9034527b 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -11,6 +11,7 @@ import pandas as pd from pandas import DataFrame, Series, Timestamp +from pandas.core.indexes.base import InvalidIndexError from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range from pandas.core.resample import _get_period_range_edges @@ -72,17 +73,19 @@ def test_asfreq_fill_value(self, series): @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W']) @pytest.mark.parametrize('kind', [None, 'period', 'timestamp']) - def test_selection(self, index, freq, kind): + @pytest.mark.parametrize('kwargs', [dict(on='date'), dict(level='d')]) + def test_selection(self, index, freq, kind, kwargs): # This is a bug, these should be implemented # GH 14008 rng = np.arange(len(index), dtype=np.int64) df = DataFrame({'date': index, 'a': rng}, index=pd.MultiIndex.from_arrays([rng, index], names=['v', 'd'])) - with pytest.raises(NotImplementedError): - df.resample(freq, on='date', kind=kind) - with pytest.raises(NotImplementedError): - df.resample(freq, level='d', kind=kind) + msg = ("Resampling from level= or on= selection with a PeriodIndex is" + r" not currently supported, use \.set_index\(\.\.\.\) to" + " explicitly set index") + with pytest.raises(NotImplementedError, match=msg): + df.resample(freq, kind=kind, **kwargs) @pytest.mark.parametrize('month', MONTHS) @pytest.mark.parametrize('meth', ['ffill', 'bfill']) @@ -110,13 +113,20 @@ def test_basic_downsample(self, simple_period_range_series): assert_series_equal(ts.resample('a-dec').mean(), result) assert_series_equal(ts.resample('a').mean(), result) - def test_not_subperiod(self, simple_period_range_series): + @pytest.mark.parametrize('rule,expected_error_msg', [ + ('a-dec', ''), + ('q-mar', ''), + ('M', ''), + ('w-thu', '') + ]) + def test_not_subperiod( + self, simple_period_range_series, rule, expected_error_msg): # These are incompatible period rules for resampling ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed') - pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) - pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) - pytest.raises(ValueError, lambda: ts.resample('M').mean()) - pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) + msg = ("Frequency cannot be resampled to {}, as they" + " are not sub or super periods").format(expected_error_msg) + with pytest.raises(IncompatibleFrequency, match=msg): + ts.resample(rule).mean() @pytest.mark.parametrize('freq', ['D', '2D']) def test_basic_upsample(self, freq, simple_period_range_series): @@ -212,8 +222,9 @@ def test_resample_same_freq(self, resample_method): assert_series_equal(result, expected) def test_resample_incompat_freq(self): - - with pytest.raises(IncompatibleFrequency): + msg = ("Frequency cannot be resampled to ," + " as they are not sub or super periods") + with pytest.raises(IncompatibleFrequency, match=msg): Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')).resample('W').mean() @@ -373,7 +384,9 @@ def test_resample_fill_missing(self): def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') s = Series(np.random.randn(5), index=rng) - pytest.raises(Exception, lambda: s.resample('A').ffill()) + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + s.resample('A').ffill() @pytest.mark.parametrize('freq', ['5min']) @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 69684daf05f3d..a694942cc4c40 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -113,16 +113,14 @@ def test_getitem(): test_frame.columns[[0, 1]]) -def test_select_bad_cols(): - +@pytest.mark.parametrize('key', [['D'], ['A', 'D']]) +def test_select_bad_cols(key): g = test_frame.resample('H') - pytest.raises(KeyError, g.__getitem__, ['D']) - - pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with pytest.raises(KeyError, match='^[^A]+$'): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[['A', 'D']] + # 'A' should not be referenced as a bad column... + # will have to rethink regex if you change message! + msg = r"^\"Columns not found: 'D'\"$" + with pytest.raises(KeyError, match=msg): + g[key] def test_attribute_access(): @@ -216,7 +214,9 @@ def test_fillna(): result = r.fillna(method='bfill') assert_series_equal(result, expected) - with pytest.raises(ValueError): + msg = (r"Invalid fill method\. Expecting pad \(ffill\), backfill" + r" \(bfill\) or nearest\. Got 0") + with pytest.raises(ValueError, match=msg): r.fillna(0) @@ -437,12 +437,11 @@ def test_agg_misc(): # errors # invalid names in the agg specification + msg = "\"Column 'B' does not exist!\"" for t in cases: - with pytest.raises(KeyError): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - t[['A']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) + with pytest.raises(KeyError, match=msg): + t[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) def test_agg_nested_dicts(): @@ -464,11 +463,11 @@ def test_agg_nested_dicts(): df.groupby(pd.Grouper(freq='2D')) ] + msg = r"cannot perform renaming for r(1|2) with a nested dictionary" for t in cases: - def f(): + with pytest.raises(pd.core.base.SpecificationError, match=msg): t.aggregate({'r1': {'A': ['mean', 'sum']}, 'r2': {'B': ['mean', 'sum']}}) - pytest.raises(ValueError, f) for t in cases: expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), @@ -499,7 +498,8 @@ def test_try_aggregate_non_existing_column(): df = DataFrame(data).set_index('dt') # Error as we don't have 'z' column - with pytest.raises(KeyError): + msg = "\"Column 'z' does not exist!\"" + with pytest.raises(KeyError, match=msg): df.resample('30T').agg({'x': ['mean'], 'y': ['median'], 'z': ['sum']}) @@ -517,23 +517,29 @@ def test_selection_api_validation(): df_exp = DataFrame({'a': rng}, index=index) # non DatetimeIndex - with pytest.raises(TypeError): + msg = ("Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex," + " but got an instance of 'Int64Index'") + with pytest.raises(TypeError, match=msg): df.resample('2D', level='v') - with pytest.raises(ValueError): + msg = "The Grouper cannot specify both a key and a level!" + with pytest.raises(ValueError, match=msg): df.resample('2D', on='date', level='d') - with pytest.raises(TypeError): + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): df.resample('2D', on=['a', 'date']) - with pytest.raises(KeyError): + msg = r"\"Level \['a', 'date'\] not found\"" + with pytest.raises(KeyError, match=msg): df.resample('2D', level=['a', 'date']) # upsampling not allowed - with pytest.raises(ValueError): + msg = ("Upsampling from level= or on= selection is not supported, use" + r" \.set_index\(\.\.\.\) to explicitly set index to datetime-like") + with pytest.raises(ValueError, match=msg): df.resample('2D', level='d').asfreq() - - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): df.resample('2D', on='date').asfreq() exp = df_exp.resample('2D').sum() diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index ec29b55ac9d67..a4eb7933738c0 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -112,7 +112,7 @@ def test_fails_on_no_datetime_index(name, func): df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an instance of %r" % name) + "or PeriodIndex, but got an instance of '{}'".format(name)) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D'))