Skip to content

Commit

Permalink
DataFrame.append preserves columns dtype if possible
Browse files Browse the repository at this point in the history
  • Loading branch information
tp committed Jan 1, 2018
1 parent 0e3c797 commit 6bb6860
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Expand Up @@ -145,6 +145,7 @@ Other Enhancements
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
- :meth:`DataFrame.append` now preserves the type of the calling dataframe's columns, when possible (:issue:`18359`)

.. _whatsnew_0230.api_breaking:

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Expand Up @@ -5039,8 +5039,12 @@ def append(self, other, ignore_index=False, verify_integrity=False):
# index name will be reset
index = Index([other.name], name=self.index.name)

combined_columns = self.columns.tolist() + self.columns.union(
other.index).difference(self.columns).tolist()
idx_diff = other.index.difference(self.columns)
try:
combined_columns = self.columns.append(idx_diff)
except TypeError:
lst = self.columns.tolist()
combined_columns = Index(lst).append(idx_diff)
other = other.reindex(combined_columns, copy=False)
other = DataFrame(other.values.reshape((1, len(other))),
index=index,
Expand Down
51 changes: 50 additions & 1 deletion pandas/tests/reshape/test_concat.py
@@ -1,5 +1,6 @@
from warnings import catch_warnings

import datetime as dt
import dateutil
import numpy as np
from numpy.random import randn
Expand Down Expand Up @@ -820,11 +821,59 @@ def test_append_preserve_index_name(self):
result = df1.append(df2)
assert result.index.name == 'A'

@pytest.mark.parametrize("df_columns", [
pd.RangeIndex(3),
pd.CategoricalIndex('A B C'.split()),
pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
dt.datetime(2013, 1, 3, 6, 10),
dt.datetime(2013, 1, 3, 7, 12)]),
pd.Index([1, 2, 3]),
])
def test_append_same_columns_type(self, df_columns):
# GH18359

# ser.index is a normal pd.Index, result from df.append(ser) should be
# pd.Index (but this is not possible for IntervalIndex and MultiIndex)
if not isinstance(df_columns, (pd.IntervalIndex, pd.MultiIndex)):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
ser = pd.Series([7], index=['a'], name=2)
result = df.append(ser)
idx_diff = ser.index.difference(df_columns)
combined_columns = Index(df_columns.tolist()).append(idx_diff)
expected = pd.DataFrame([[1., 2., 3., np.nan],
[4, 5, 6, np.nan],
[np.nan, np.nan, np.nan, 7]],
index=[0, 1, 2],
columns=combined_columns)
assert_frame_equal(result, expected)

# df wider than ser
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
ser_index = df_columns[:2]
ser = pd.Series([7, 8], index=ser_index, name=2)
result = df.append(ser)
expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
index=[0, 1, 2],
columns=df_columns)
assert_frame_equal(result, expected)

# ser wider than df
ser_index = df_columns
df_columns = df_columns[:2]
df = pd.DataFrame([[1, 2], [4, 5]], columns=df_columns)
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
result = df.append(ser)
expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
index=[0, 1, 2],
columns=ser_index)
assert_frame_equal(result, expected)

def test_append_dtype_coerce(self):

# GH 4993
# appending with datetime will incorrectly convert datetime64
import datetime as dt
from pandas import NaT

df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/reshape/test_pivot.py
Expand Up @@ -1457,12 +1457,14 @@ def test_crosstab_normalize(self):
index=pd.Index([1, 2, 'All'],
name='a',
dtype='object'),
columns=pd.Index([3, 4], name='b'))
columns=pd.Index([3, 4], name='b',
dtype='object'))
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
index=pd.Index([1, 2], name='a',
dtype='object'),
columns=pd.Index([3, 4, 'All'],
name='b'))
name='b',
dtype='object'))

all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
[0.2, 0.6, 0.8],
Expand All @@ -1471,7 +1473,8 @@ def test_crosstab_normalize(self):
name='a',
dtype='object'),
columns=pd.Index([3, 4, 'All'],
name='b'))
name='b',
dtype='object'))
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
margins=True), row_normal_margins)
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',
Expand Down

0 comments on commit 6bb6860

Please sign in to comment.