diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6407a33c442d0d..a137f6cdae6645 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -145,6 +145,7 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :meth:`DataFrame.append` now preserves the type of the calling dataframe's columns, when possible (:issue:`18359`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9acc82b50aabfe..16caa71a38d208 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5039,8 +5039,12 @@ def append(self, other, ignore_index=False, verify_integrity=False): # index name will be reset index = Index([other.name], name=self.index.name) - combined_columns = self.columns.tolist() + self.columns.union( - other.index).difference(self.columns).tolist() + idx_diff = other.index.difference(self.columns) + try: + combined_columns = self.columns.append(idx_diff) + except TypeError: + lst = self.columns.tolist() + combined_columns = Index(lst).append(idx_diff) other = other.reindex(combined_columns, copy=False) other = DataFrame(other.values.reshape((1, len(other))), index=index, diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index f66cb12b112109..814d9c7fcc2864 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,6 @@ from warnings import catch_warnings +import datetime as dt import dateutil import numpy as np from numpy.random import randn @@ -820,11 +821,59 @@ def test_append_preserve_index_name(self): result = df1.append(df2) assert result.index.name == 'A' + @pytest.mark.parametrize("df_columns", [ + pd.RangeIndex(3), + pd.CategoricalIndex('A B C'.split()), + pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]), + pd.IntervalIndex.from_breaks([0, 1, 2, 3]), + pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12)]), + pd.Index([1, 2, 3]), + ]) + def test_append_same_columns_type(self, df_columns): + # GH18359 + + # ser.index is a normal pd.Index, result from df.append(ser) should be + # pd.Index (but this is not possible for IntervalIndex and MultiIndex) + if not isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser = pd.Series([7], index=['a'], name=2) + result = df.append(ser) + idx_diff = ser.index.difference(df_columns) + combined_columns = Index(df_columns.tolist()).append(idx_diff) + expected = pd.DataFrame([[1., 2., 3., np.nan], + [4, 5, 6, np.nan], + [np.nan, np.nan, np.nan, 7]], + index=[0, 1, 2], + columns=combined_columns) + assert_frame_equal(result, expected) + + # df wider than ser + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser_index = df_columns[:2] + ser = pd.Series([7, 8], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]], + index=[0, 1, 2], + columns=df_columns) + assert_frame_equal(result, expected) + + # ser wider than df + ser_index = df_columns + df_columns = df_columns[:2] + df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns) + ser = pd.Series([7, 8, 9], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], + index=[0, 1, 2], + columns=ser_index) + assert_frame_equal(result, expected) + def test_append_dtype_coerce(self): # GH 4993 # appending with datetime will incorrectly convert datetime64 - import datetime as dt from pandas import NaT df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0), diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 786c57a4a82df2..2a6b927b9c16d1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1457,12 +1457,14 @@ def test_crosstab_normalize(self): index=pd.Index([1, 2, 'All'], name='a', dtype='object'), - columns=pd.Index([3, 4], name='b')) + columns=pd.Index([3, 4], name='b', + dtype='object')) col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]], index=pd.Index([1, 2], name='a', dtype='object'), columns=pd.Index([3, 4, 'All'], - name='b')) + name='b', + dtype='object')) all_normal_margins = pd.DataFrame([[0.2, 0, 0.2], [0.2, 0.6, 0.8], @@ -1471,7 +1473,8 @@ def test_crosstab_normalize(self): name='a', dtype='object'), columns=pd.Index([3, 4, 'All'], - name='b')) + name='b', + dtype='object')) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index', margins=True), row_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',