Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: subclassed .align returns normal DataFrame #13037

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 20 additions & 14 deletions pandas/core/generic.py
Expand Up @@ -4144,18 +4144,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
if isinstance(self, Series):
# this means other is a DataFrame, and we need to broadcast
# self
df = DataFrame(
dict((c, self) for c in other.columns),
**other._construct_axes_dict())
cons = self._constructor_expanddim
df = cons(dict((c, self) for c in other.columns),
**other._construct_axes_dict())
return df._align_frame(other, join=join, axis=axis,
level=level, copy=copy,
fill_value=fill_value, method=method,
limit=limit, fill_axis=fill_axis)
elif isinstance(other, Series):
# this means self is a DataFrame, and we need to broadcast
# other
df = DataFrame(dict((c, other) for c in self.columns),
**self._construct_axes_dict())
cons = other._constructor_expanddim
df = cons(dict((c, other) for c in self.columns),
**self._construct_axes_dict())
return self._align_frame(df, join=join, axis=axis, level=level,
copy=copy, fill_value=fill_value,
method=method, limit=limit,
Expand Down Expand Up @@ -4184,20 +4185,27 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
ilidx, iridx = None, None
clidx, cridx = None, None

is_series = isinstance(self, ABCSeries)

if axis is None or axis == 0:
if not self.index.equals(other.index):
join_index, ilidx, iridx = self.index.join(
other.index, how=join, level=level, return_indexers=True)

if axis is None or axis == 1:
if not self.columns.equals(other.columns):
if not is_series and not self.columns.equals(other.columns):
join_columns, clidx, cridx = self.columns.join(
other.columns, how=join, level=level, return_indexers=True)

left = self._reindex_with_indexers({0: [join_index, ilidx],
1: [join_columns, clidx]},
copy=copy, fill_value=fill_value,
if is_series:
reindexers = {0: [join_index, ilidx]}
else:
reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}

left = self._reindex_with_indexers(reindexers, copy=copy,
fill_value=fill_value,
allow_dups=True)
# other must be always DataFrame
right = other._reindex_with_indexers({0: [join_index, iridx],
1: [join_columns, cridx]},
copy=copy, fill_value=fill_value,
Expand All @@ -4212,10 +4220,8 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
def _align_series(self, other, join='outer', axis=None, level=None,
copy=True, fill_value=None, method=None, limit=None,
fill_axis=0):
from pandas import DataFrame

# series/series compat
if isinstance(self, ABCSeries) and isinstance(other, ABCSeries):
# series/series compat, other must always be a Series
if isinstance(self, ABCSeries):
if axis:
raise ValueError('cannot align series to a series other than '
'axis 0')
Expand Down Expand Up @@ -4261,7 +4267,7 @@ def _align_series(self, other, join='outer', axis=None, level=None,
if copy and fdata is self._data:
fdata = fdata.copy()

left = DataFrame(fdata)
left = self._constructor(fdata)

if ridx is None:
right = other
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_axis_select_reindex.py
Expand Up @@ -597,6 +597,27 @@ def test_align_multiindex(self):
assert_frame_equal(expr, res1r)
assert_frame_equal(expr, res2l)

def test_align_series_combinations(self):
df = pd.DataFrame({'a': [1, 3, 5],
'b': [1, 3, 5]}, index=list('ACE'))
s = pd.Series([1, 2, 4], index=list('ABD'), name='x')

# frame + series
res1, res2 = df.align(s, axis=0)
exp1 = pd.DataFrame({'a': [1, np.nan, 3, np.nan, 5],
'b': [1, np.nan, 3, np.nan, 5]},
index=list('ABCDE'))
exp2 = pd.Series([1, 2, np.nan, 4, np.nan],
index=list('ABCDE'), name='x')

tm.assert_frame_equal(res1, exp1)
tm.assert_series_equal(res2, exp2)

# series + frame
res1, res2 = s.align(df)
tm.assert_series_equal(res1, exp2)
tm.assert_frame_equal(res2, exp1)

def test_filter(self):
# items
filtered = self.frame.filter(['A', 'B', 'E'])
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/frame/test_subclass.py
Expand Up @@ -2,6 +2,8 @@

from __future__ import print_function

import numpy as np

from pandas import DataFrame, Series, MultiIndex, Panel
import pandas as pd
import pandas.util.testing as tm
Expand Down Expand Up @@ -156,3 +158,55 @@ def bar(self):
return self.i_dont_exist
with tm.assertRaisesRegexp(AttributeError, '.*i_dont_exist.*'):
A().bar

def test_subclass_align(self):
# GH 12983
df1 = tm.SubclassedDataFrame({'a': [1, 3, 5],
'b': [1, 3, 5]}, index=list('ACE'))
df2 = tm.SubclassedDataFrame({'c': [1, 2, 4],
'd': [1, 2, 4]}, index=list('ABD'))

res1, res2 = df1.align(df2, axis=0)
exp1 = tm.SubclassedDataFrame({'a': [1, np.nan, 3, np.nan, 5],
'b': [1, np.nan, 3, np.nan, 5]},
index=list('ABCDE'))
exp2 = tm.SubclassedDataFrame({'c': [1, 2, np.nan, 4, np.nan],
'd': [1, 2, np.nan, 4, np.nan]},
index=list('ABCDE'))
tm.assertIsInstance(res1, tm.SubclassedDataFrame)
tm.assert_frame_equal(res1, exp1)
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
tm.assert_frame_equal(res2, exp2)

res1, res2 = df1.a.align(df2.c)
tm.assertIsInstance(res1, tm.SubclassedSeries)
tm.assert_series_equal(res1, exp1.a)
tm.assertIsInstance(res2, tm.SubclassedSeries)
tm.assert_series_equal(res2, exp2.c)

def test_subclass_align_combinations(self):
# GH 12983
df = tm.SubclassedDataFrame({'a': [1, 3, 5],
'b': [1, 3, 5]}, index=list('ACE'))
s = tm.SubclassedSeries([1, 2, 4], index=list('ABD'), name='x')

# frame + series
res1, res2 = df.align(s, axis=0)
exp1 = pd.DataFrame({'a': [1, np.nan, 3, np.nan, 5],
'b': [1, np.nan, 3, np.nan, 5]},
index=list('ABCDE'))
# name is lost when
exp2 = pd.Series([1, 2, np.nan, 4, np.nan],
index=list('ABCDE'), name='x')

tm.assertIsInstance(res1, tm.SubclassedDataFrame)
tm.assert_frame_equal(res1, exp1)
tm.assertIsInstance(res2, tm.SubclassedSeries)
tm.assert_series_equal(res2, exp2)

# series + frame
res1, res2 = s.align(df)
tm.assertIsInstance(res1, tm.SubclassedSeries)
tm.assert_series_equal(res1, exp2)
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
tm.assert_frame_equal(res2, exp1)
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Expand Up @@ -2420,7 +2420,7 @@ def inner(*args, **kwargs):


class SubclassedSeries(Series):
_metadata = ['testattr']
_metadata = ['testattr', 'name']

@property
def _constructor(self):
Expand Down