Skip to content

Commit

Permalink
type diversity breaks alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
behzadnouri committed Oct 4, 2014
1 parent 977034a commit 30246a7
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1007,3 +1007,4 @@ Bug Fixes
- Bug in Index.intersection on non-monotonic non-unique indexes (:issue:`8362`).
- Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`)
- Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`)
- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`)
24 changes: 10 additions & 14 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,16 +439,10 @@ def can_do_equal_len():
if isinstance(value, ABCDataFrame) and value.ndim > 1:

for item in labels:

# align to
if item in value:
v = value[item]
i = self.obj[item].index
v = v.reindex(i & v.index)

setter(item, v.values)
else:
setter(item, np.nan)
v = np.nan if item not in value else \
self._align_series(indexer[0], value[item])
setter(item, v)

# we have an equal len ndarray/convertible to our labels
elif np.array(value).ndim == 2:
Expand Down Expand Up @@ -511,6 +505,10 @@ def _align_series(self, indexer, ser):

if isinstance(indexer, tuple):

# flatten np.ndarray indexers
ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i
indexer = tuple(map(ravel, indexer))

aligners = [not _is_null_slice(idx) for idx in indexer]
sum_aligners = sum(aligners)
single_aligner = sum_aligners == 1
Expand All @@ -536,12 +534,11 @@ def _align_series(self, indexer, ser):
# series, so need to broadcast (see GH5206)
if (sum_aligners == self.ndim and
all([com._is_sequence(_) for _ in indexer])):
ser = ser.reindex(obj.axes[0][indexer[0].ravel()],
copy=True).values
ser = ser.reindex(obj.axes[0][indexer[0]], copy=True).values

# single indexer
if len(indexer) > 1:
l = len(indexer[1].ravel())
l = len(indexer[1])
ser = np.tile(ser, l).reshape(l, -1).T

return ser
Expand All @@ -557,7 +554,7 @@ def _align_series(self, indexer, ser):
if not is_list_like(new_ix):
new_ix = Index([new_ix])
else:
new_ix = Index(new_ix.ravel())
new_ix = Index(new_ix)
if ser.index.equals(new_ix) or not len(new_ix):
return ser.values.copy()

Expand Down Expand Up @@ -1765,4 +1762,3 @@ def _maybe_droplevels(index, key):
pass

return index

1 change: 1 addition & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,6 +1405,7 @@ def test_setitem_frame(self):
# key is unaligned with values
f = self.mixed_frame.copy()
piece = f.ix[:2, ['A']]
piece.index = f.index[-2:]
key = (slice(-2, None), ['A', 'B'])
f.ix[key] = piece
piece['B'] = np.nan
Expand Down
58 changes: 54 additions & 4 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,11 +1031,12 @@ def test_loc_setitem_frame(self):


def test_loc_setitem_frame_multiples(self):

# multiple setting
df = DataFrame({ 'A' : ['foo','bar','baz'],
'B' : Series(range(3),dtype=np.int64) })
df.loc[0:1] = df.loc[1:2]
rhs = df.loc[1:2]
rhs.index = df.index[0:2]
df.loc[0:1] = rhs
expected = DataFrame({ 'A' : ['bar','baz','baz'],
'B' : Series([1,2,2],dtype=np.int64) })
assert_frame_equal(df, expected)
Expand All @@ -1047,8 +1048,9 @@ def test_loc_setitem_frame_multiples(self):
expected = DataFrame({ 'date' : [Timestamp('20000101'),Timestamp('20000102'),Timestamp('20000101'),
Timestamp('20000102'),Timestamp('20000103')],
'val' : Series([0,1,0,1,2],dtype=np.int64) })

df.loc[2:4] = df.loc[0:2]
rhs = df.loc[0:2]
rhs.index = df.index[2:5]
df.loc[2:4] = rhs
assert_frame_equal(df, expected)

def test_iloc_getitem_frame(self):
Expand Down Expand Up @@ -3987,6 +3989,54 @@ def test_float_index_at_iat(self):
for i in range(len(s)):
self.assertEqual(s.iat[i], i + 1)

def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
def run_tests(df, rhs, right):
# label, index, slice
r, i, s = list('bcd'), [1, 2, 3], slice(1, 4)
c, j, l = ['joe', 'jolie'], [1, 2], slice(1, 3)

left = df.copy()
left.loc[r, c] = rhs
assert_frame_equal(left, right)

left = df.copy()
left.iloc[i, j] = rhs
assert_frame_equal(left, right)

left = df.copy()
left.ix[s, l] = rhs
assert_frame_equal(left, right)

left = df.copy()
left.ix[i, j] = rhs
assert_frame_equal(left, right)

left = df.copy()
left.ix[r, c] = rhs
assert_frame_equal(left, right)

xs = np.arange(20).reshape(5, 4)
cols = ['jim', 'joe', 'jolie', 'joline']
df = pd.DataFrame(xs, columns=cols, index=list('abcde'))

# right hand side; permute the indices and multiplpy by -2
rhs = - 2 * df.iloc[3:0:-1, 2:0:-1]

# expected `right` result; just multiply by -2
right = df.copy()
right.iloc[1:4, 1:3] *= -2

# run tests with uniform dtypes
run_tests(df, rhs, right)

# make frames multi-type & re-run tests
for frame in [df, rhs, right]:
frame['joe'] = frame['joe'].astype('float64')
frame['jolie'] = frame['jolie'].map('@{0}'.format)

run_tests(df, rhs, right)

class TestSeriesNoneCoercion(tm.TestCase):
EXPECTED_RESULTS = [
Expand Down

0 comments on commit 30246a7

Please sign in to comment.