Skip to content

Commit

Permalink
BUG: Load data from a CategoricalIndex for dtype comparison, closes #… (
Browse files Browse the repository at this point in the history
#16738)

* BUG: Load data from a CategoricalIndex for dtype comparison, closes #16627

* Enable is_dtype_equal on CategoricalIndex, fixed some doc typos, added ordered CategoricalIndex test

* Flake8 windows suggestion

* Fixed some documentation/formatting issues, clarified the purpose of the test case.

(cherry picked from commit 5b88d2f)
  • Loading branch information
thequackdaddy authored and TomAugspurger committed Jul 7, 2017
1 parent c61978e commit 841decf
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- [ ] closes #xxxx
- [ ] tests added / passed
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff``
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.)
- [ ] whatsnew entry
6 changes: 6 additions & 0 deletions doc/source/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,12 @@ run this slightly modified command::

git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8

Note that on Windows, ``grep``, ``xargs``, and other tools are likely
unavailable. However, this has been shown to work on smaller commits in the
standard Windows command line::

git diff master -u -- "*.py" | flake8 --diff

Backwards Compatibility
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ Sparse
Reshaping
^^^^^^^^^

- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).


Numeric
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True,
na_value=-1)
return self._create_from_codes(taken)

def is_dtype_equal(self, other):
return self._data.is_dtype_equal(other)

take_nd = take

def map(self, mapper):
Expand Down
44 changes: 42 additions & 2 deletions pandas/tests/test_join.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-

import numpy as np
from pandas import Index
from pandas import Index, DataFrame, Categorical, merge

from pandas._libs import join as _join
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
from pandas.util.testing import assert_almost_equal, assert_frame_equal


class TestIndexer(object):
Expand Down Expand Up @@ -192,3 +192,43 @@ def test_inner_join_indexer2():

exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64)
assert_almost_equal(ridx, exp_ridx)


def test_merge_join_categorical_multiindex():
# From issue 16627
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
['a', 'b', 'c']),
'Int1': [0, 1, 0, 1, 0, 0]}
a = DataFrame(a)

b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
['a', 'b', 'c']),
'Int': [0, 0, 0, 1, 1, 1],
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']

expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
right_on=['Cat', 'Int'], how='left')
result = a.join(b, on=['Cat1', 'Int1'])
expected = expected.drop(['Cat', 'Int'], axis=1)
assert_frame_equal(expected, result)

# Same test, but with ordered categorical
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
['b', 'a', 'c'],
ordered=True),
'Int1': [0, 1, 0, 1, 0, 0]}
a = DataFrame(a)

b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
['b', 'a', 'c'],
ordered=True),
'Int': [0, 0, 0, 1, 1, 1],
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']

expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
right_on=['Cat', 'Int'], how='left')
result = a.join(b, on=['Cat1', 'Int1'])
expected = expected.drop(['Cat', 'Int'], axis=1)
assert_frame_equal(expected, result)

0 comments on commit 841decf

Please sign in to comment.