Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex #15478

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ Other enhancements
- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)
- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`)
- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
- Fixed issue when using ``pd.concat`` that affected ``MultiIndex`` output formatting when names of index were int (:issue:`12223`, :issue:`15262`)

.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2876,7 +2876,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
names = [x for x in self.index.names]
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index.get_level_values(i))
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

Expand All @@ -2886,9 +2886,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
# append all but the last column so we don't have to modify
# the end of this loop
for n in range(col.nlevels - 1):
arrays.append(col.get_level_values(n))
arrays.append(col._get_level_values(n))

level = col.get_level_values(col.nlevels - 1)
level = col._get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, Series):
level = col._values
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,8 @@ def _set_grouper(self, obj, sort=False):
# equivalent to the axis name
if isinstance(ax, MultiIndex):
level = ax._get_level_number(level)
ax = Index(ax.get_level_values(
level), name=ax.names[level])
ax = Index(ax._get_level_values(level),
name=ax.names[level])

else:
if level not in (0, ax.name):
Expand Down Expand Up @@ -761,7 +761,7 @@ def _index_with_as_index(self, b):
gp = self.grouper
levels = chain((gp.levels[i][gp.labels[i][b]]
for i in range(len(gp.groupings))),
(original.get_level_values(i)[b]
(original._get_level_values(i)[b]
for i in range(original.nlevels)))
new = MultiIndex.from_arrays(list(levels))
new.names = gp.names + original.names
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
mdata[value_name] = frame.values.ravel('F')
for i, col in enumerate(var_name):
# asanyarray will keep the columns as an Index
mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N)
mdata[col] = np.asanyarray(frame.columns
._get_level_values(i)).repeat(N)

return DataFrame(mdata, columns=mcolumns)

Expand Down
2 changes: 1 addition & 1 deletion pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,7 +1566,7 @@ def _save_header(self):
if isinstance(index_label, list) and len(index_label) > 1:
col_line.extend([''] * (len(index_label) - 1))

col_line.extend(columns.get_level_values(i))
col_line.extend(columns._get_level_values(i))

writer.writerow(col_line)

Expand Down
18 changes: 18 additions & 0 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,24 @@ def get_level_values(self, level):
self._validate_index_level(level)
return self

def _get_level_values(self, num):
"""
Return vector of label values for requested level, equal to the length
of the index

**this is an internal method**

Parameters
----------
level : int

Returns
-------
values : ndarray
"""
# Needed to address discussion in GH #10461
return self.get_level_values(num)

_index_shared_docs['get_indexer'] = """
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
Expand Down
14 changes: 8 additions & 6 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def is_monotonic_increasing(self):
"""

# reversed() because lexsort() wants the most significant key last.
values = [self._get_level_values(i)
values = [self._get_level_values(i).values
for i in reversed(range(len(self.levels)))]
try:
sort_order = np.lexsort(values)
Expand Down Expand Up @@ -866,7 +866,8 @@ def _get_level_values(self, level):
labels = self.labels[level]
filled = algos.take_1d(unique._values, labels,
fill_value=unique._na_value)
return filled
values = unique._shallow_copy(filled)
return values

def get_level_values(self, level):
"""
Expand All @@ -883,7 +884,7 @@ def get_level_values(self, level):
"""
level = self._get_level_number(level)
values = self._get_level_values(level)
return self.levels[level]._shallow_copy(values)
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
na_rep=None, formatter=None):
Expand Down Expand Up @@ -966,7 +967,8 @@ def to_frame(self, index=True):
"""

from pandas import DataFrame
result = DataFrame({(name or level): self.get_level_values(level)
result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
copy=False)
Expand Down Expand Up @@ -1301,8 +1303,8 @@ def append(self, other):
for o in other):
arrays = []
for i in range(self.nlevels):
label = self.get_level_values(i)
appended = [o.get_level_values(i) for o in other]
label = self._get_level_values(i)
appended = [o._get_level_values(i) for o in other]
arrays.append(label.append(appended))
return MultiIndex.from_arrays(arrays, names=self.names)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
if self.index is not None:
for i, idx_label in enumerate(self.index):
idx_type = dtype_mapper(
self.frame.index.get_level_values(i))
self.frame.index._get_level_values(i))
column_names_and_types.append((text_type(idx_label),
idx_type, True))

Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,24 @@ def test_concat_axis_parameter(self):
with assertRaisesRegexp(ValueError, 'No axis named'):
pd.concat([series1, series2], axis='something')

def test_concat_numerical_names(self):
# #15262 # #12223
df = pd.DataFrame({'col': range(9)},
dtype='int32',
index=(pd.MultiIndex
.from_product([['A0', 'A1', 'A2'],
['B0', 'B1', 'B2']],
names=[1, 2])))
result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :]))
expected = pd.DataFrame({'col': [0, 1, 7, 8]},
dtype='int32',
index=pd.MultiIndex.from_tuples([('A0', 'B0'),
('A0', 'B1'),
('A2', 'B1'),
('A2', 'B2')],
names=[1, 2]))
tm.assert_frame_equal(result, expected)


class TestDataFrameCombineFirst(tm.TestCase, TestData):

Expand Down
6 changes: 3 additions & 3 deletions pandas/util/doctools.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def _insert_index(self, data):
else:
for i in range(idx_nlevels):
data.insert(i, 'Index{0}'.format(i),
data.index.get_level_values(i))
data.index._get_level_values(i))

col_nlevels = data.columns.nlevels
if col_nlevels > 1:
col = data.columns.get_level_values(0)
values = [data.columns.get_level_values(i).values
col = data.columns._get_level_values(0)
values = [data.columns._get_level_values(i).values
for i in range(1, col_nlevels)]
col_df = pd.DataFrame(values)
data.columns = col_df.columns
Expand Down