Skip to content

Commit

Permalink
Generic solution to categorical problem
Browse files Browse the repository at this point in the history
  • Loading branch information
dwillmer committed Jul 17, 2017
1 parent 48e7163 commit 218da66
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
12 changes: 4 additions & 8 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,12 +896,6 @@ def _maybe_coerce_merge_keys(self):
if lk.is_dtype_equal(rk):
continue

# if we are dates with differing categories
# then allow them to proceed because
# coercing to object below results in integers.
if is_datetimelike(lk.categories) and is_datetimelike(rk.categories):
continue

elif is_categorical_dtype(lk) or is_categorical_dtype(rk):
pass

Expand All @@ -923,11 +917,13 @@ def _maybe_coerce_merge_keys(self):
# Houston, we have a problem!
# let's coerce to object
if name in self.left.columns:
typ = lk.categories.dtype if is_categorical_dtype(lk) else object
self.left = self.left.assign(
**{name: self.left[name].astype(object)})
**{name: self.left[name].astype(typ)})
if name in self.right.columns:
typ = rk.categories.dtype if is_categorical_dtype(rk) else object
self.right = self.right.assign(
**{name: self.right[name].astype(object)})
**{name: self.right[name].astype(typ)})

def _validate_specification(self):
# Hm, any way to make this logic less complicated??
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/reshape/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,7 @@ def test_self_join_multiple_categories(self):

assert_frame_equal(result, df)

def test_dtype_on_categorical_dates(self):
def test_categorical_dates(self):
# GH 16900
# dates should not be coerced to ints

Expand All @@ -1534,10 +1534,13 @@ def test_dtype_on_categorical_dates(self):
df2['date'] = df2['date'].astype('category')

result = pd.merge(df, df2, how='outer', on=['date'])
assert result['date'].dtype == 'category'
assert result.shape == (3, 3)
assert result['date'].iloc[0] == pd.Timestamp('2001-01-01')
assert result['date'].iloc[-1] == pd.Timestamp('2001-01-03')

result_inner = pd.merge(df, df2, how='inner', on=['date'])
assert result_inner['date'].dtype == 'category'
assert result_inner.shape == (1, 3)
assert result_inner['date'].iloc[-1] == pd.Timestamp('2001-01-01')


@pytest.fixture
Expand Down

0 comments on commit 218da66

Please sign in to comment.