diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e42752cca9043..1390e60179aae 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -566,6 +566,7 @@ Other API changes - Removed support of gtk package for clipboards (:issue:`26563`) - Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) - :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) +- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`) .. _whatsnew_0250.deprecations: @@ -719,6 +720,7 @@ Timezones - Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) - Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) - Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`) +- Bug in :meth:`DatetimeIndex.union` when combining a timezone aware and timezone unaware :class:`DatetimeIndex` (:issue:`21671`) Numeric ^^^^^^^ @@ -814,6 +816,7 @@ I/O - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). - Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) +- Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) Plotting ^^^^^^^^ @@ -868,6 +871,7 @@ Reshaping - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) - Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) - Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) +- Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) Sparse ^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c8c27f62cef34..f439e365fbcf0 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -23,7 +23,8 @@ from pandas.core.dtypes.common import ( ensure_object, is_categorical_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_list_like, is_timedelta64_dtype) + is_datetime64tz_dtype, is_extension_type, is_list_like, + is_timedelta64_dtype) from pandas.core.dtypes.missing import array_equivalent from pandas import ( @@ -2647,6 +2648,9 @@ def write_multi_index(self, key, index): index.codes, index.names)): # write the level + if is_extension_type(lev): + raise NotImplementedError("Saving a MultiIndex with an " + "extension dtype is not supported.") level_key = '{key}_level{idx}'.format(key=key, idx=i) conv_level = _convert_index(lev, self.encoding, self.errors, self.format_type).set_name(level_key) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 088007ba6af4b..af0183379790a 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1094,6 +1094,18 @@ def test_dti_union_aware(self): assert result[0].tz.zone == 'US/Central' assert result[-1].tz.zone == 'US/Eastern' + def test_dti_union_mixed(self): + # GH 21671 + rng = DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT]) + rng2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz='Asia/Tokyo') + result = rng.union(rng2) + expected = Index([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), + pd.Timestamp('2012-01-02', tz='Asia/Tokyo')], + dtype=object) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central", dateutil.tz.tzoffset(None, -28800)]) @pytest.mark.usefixtures("datetime_tz_utc") diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 85d30b8f6de6b..8413fc1318d0b 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -10,8 +10,8 @@ def test_dtype_str(indices): with tm.assert_produces_warning(FutureWarning): dtype = indices.dtype_str - assert isinstance(dtype, str) - assert dtype == str(indices.dtype) + assert isinstance(dtype, str) + assert dtype == str(indices.dtype) def test_format(idx): diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 40cc05c317471..ec34739672718 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -17,9 +17,9 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, - RangeIndex, Series, Timestamp, bdate_range, concat, date_range, isna, - timedelta_range) + Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, Int64Index, + MultiIndex, RangeIndex, Series, Timestamp, bdate_range, concat, date_range, + isna, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, set_timezone) @@ -4749,6 +4749,19 @@ def test_select_empty_where(self, where): result = pd.read_hdf(store, "df", where=where) assert_frame_equal(result, df) + @pytest.mark.parametrize('idx', [ + date_range('2019', freq='D', periods=3, tz='UTC'), + CategoricalIndex(list('abc')) + ]) + def test_to_hdf_multiindex_extension_dtype(self, idx): + # GH 7775 + mi = MultiIndex.from_arrays([idx, idx]) + df = pd.DataFrame(0, index=mi, columns=['a']) + with ensure_clean_path(self.path) as path: + with pytest.raises(NotImplementedError, + match="Saving a MultiIndex"): + df.to_hdf(path, 'df') + class TestHDFComplexValues(Base): # GH10447 @@ -5170,3 +5183,20 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) + + def test_read_with_where_tz_aware_index(self): + # GH 11926 + periods = 10 + dts = pd.date_range('20151201', periods=periods, + freq='D', tz='UTC') + mi = pd.MultiIndex.from_arrays([dts, range(periods)], + names=['DATE', 'NO']) + expected = pd.DataFrame({'MYCOL': 0}, index=mi) + + key = 'mykey' + with ensure_clean_path(self.path) as path: + with pd.HDFStore(path) as store: + store.append(key, expected, format='table', append=True) + result = pd.read_hdf(path, key, + where="DATE > 20151130") + assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8543d2c2df7d6..7def8e53859c7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -429,6 +429,46 @@ def test_pivot_with_tz(self, method): pv = pd.pivot(df, index='dt1', columns='dt2', values='data1') tm.assert_frame_equal(pv, expected) + def test_pivot_tz_in_values(self): + # GH 14948 + df = pd.DataFrame([{'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 13:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 08:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 14:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-25 11:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-25 13:00:00-0700', + tz='US/Pacific')}]) + + df = df.set_index('ts').reset_index() + mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, + second=0, microsecond=0)) + + result = pd.pivot_table(df.set_index('ts').reset_index(), + values='ts', index=['uid'], columns=[mins], + aggfunc=np.min) + expected = pd.DataFrame( + [ + [pd.Timestamp('2016-08-12 08:00:00-0700', tz='US/Pacific'), + pd.Timestamp('2016-08-25 11:00:00-0700', tz='US/Pacific')] + ], + index=pd.Index(['aa'], name='uid'), + columns=pd.DatetimeIndex( + [ + pd.Timestamp('2016-08-12 00:00:00', tz='US/Pacific'), + pd.Timestamp('2016-08-25 00:00:00', tz='US/Pacific') + ], + name='ts') + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('method', [True, False]) def test_pivot_periods(self, method): df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),