Skip to content

Commit

Permalink
BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex
Browse files Browse the repository at this point in the history
closes #12223
closes #15262

Author: Dr-Irv <irv@princeton.com>

Closes #15478 from Dr-Irv/Issue15262 and squashes the following commits:

15d8433 [Dr-Irv] Address jreback comments
10667a3 [Dr-Irv] Fix types for test
8935068 [Dr-Irv] resolve conflicts
385ca3e [Dr-Irv] BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex
  • Loading branch information
Dr-Irv authored and jreback committed Feb 24, 2017
1 parent 7e0a71b commit 5955804
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ Bug Fixes

- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`)
- Bug in ``Series`` construction with a datetimetz (:issue:`14928`)
- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`)

- Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`)
- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2876,7 +2876,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
names = [x for x in self.index.names]
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index.get_level_values(i))
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

Expand All @@ -2886,9 +2886,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
# append all but the last column so we don't have to modify
# the end of this loop
for n in range(col.nlevels - 1):
arrays.append(col.get_level_values(n))
arrays.append(col._get_level_values(n))

level = col.get_level_values(col.nlevels - 1)
level = col._get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, Series):
level = col._values
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,8 @@ def _set_grouper(self, obj, sort=False):
# equivalent to the axis name
if isinstance(ax, MultiIndex):
level = ax._get_level_number(level)
ax = Index(ax.get_level_values(
level), name=ax.names[level])
ax = Index(ax._get_level_values(level),
name=ax.names[level])

else:
if level not in (0, ax.name):
Expand Down Expand Up @@ -761,7 +761,7 @@ def _index_with_as_index(self, b):
gp = self.grouper
levels = chain((gp.levels[i][gp.labels[i][b]]
for i in range(len(gp.groupings))),
(original.get_level_values(i)[b]
(original._get_level_values(i)[b]
for i in range(original.nlevels)))
new = MultiIndex.from_arrays(list(levels))
new.names = gp.names + original.names
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
mdata[value_name] = frame.values.ravel('F')
for i, col in enumerate(var_name):
# asanyarray will keep the columns as an Index
mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N)
mdata[col] = np.asanyarray(frame.columns
._get_level_values(i)).repeat(N)

return DataFrame(mdata, columns=mcolumns)

Expand Down
2 changes: 1 addition & 1 deletion pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,7 +1566,7 @@ def _save_header(self):
if isinstance(index_label, list) and len(index_label) > 1:
col_line.extend([''] * (len(index_label) - 1))

col_line.extend(columns.get_level_values(i))
col_line.extend(columns._get_level_values(i))

writer.writerow(col_line)

Expand Down
10 changes: 6 additions & 4 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2334,9 +2334,9 @@ def set_value(self, arr, key, value):
self._engine.set_value(_values_from_object(arr),
_values_from_object(key), value)

def get_level_values(self, level):
def _get_level_values(self, level):
"""
Return vector of label values for requested level, equal to the length
Return an Index of values for requested level, equal to the length
of the index
Parameters
Expand All @@ -2345,12 +2345,14 @@ def get_level_values(self, level):
Returns
-------
values : ndarray
values : Index
"""
# checks that level number is actually just 1

self._validate_index_level(level)
return self

get_level_values = _get_level_values

_index_shared_docs['get_indexer'] = """
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
Expand Down
14 changes: 8 additions & 6 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def is_monotonic_increasing(self):
"""

# reversed() because lexsort() wants the most significant key last.
values = [self._get_level_values(i)
values = [self._get_level_values(i).values
for i in reversed(range(len(self.levels)))]
try:
sort_order = np.lexsort(values)
Expand Down Expand Up @@ -866,7 +866,8 @@ def _get_level_values(self, level):
labels = self.labels[level]
filled = algos.take_1d(unique._values, labels,
fill_value=unique._na_value)
return filled
values = unique._shallow_copy(filled)
return values

def get_level_values(self, level):
"""
Expand All @@ -883,7 +884,7 @@ def get_level_values(self, level):
"""
level = self._get_level_number(level)
values = self._get_level_values(level)
return self.levels[level]._shallow_copy(values)
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
na_rep=None, formatter=None):
Expand Down Expand Up @@ -966,7 +967,8 @@ def to_frame(self, index=True):
"""

from pandas import DataFrame
result = DataFrame({(name or level): self.get_level_values(level)
result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
copy=False)
Expand Down Expand Up @@ -1301,8 +1303,8 @@ def append(self, other):
for o in other):
arrays = []
for i in range(self.nlevels):
label = self.get_level_values(i)
appended = [o.get_level_values(i) for o in other]
label = self._get_level_values(i)
appended = [o._get_level_values(i) for o in other]
arrays.append(label.append(appended))
return MultiIndex.from_arrays(arrays, names=self.names)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
if self.index is not None:
for i, idx_label in enumerate(self.index):
idx_type = dtype_mapper(
self.frame.index.get_level_values(i))
self.frame.index._get_level_values(i))
column_names_and_types.append((text_type(idx_label),
idx_type, True))

Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,24 @@ def test_concat_axis_parameter(self):
with assertRaisesRegexp(ValueError, 'No axis named'):
pd.concat([series1, series2], axis='something')

def test_concat_numerical_names(self):
# #15262 # #12223
df = pd.DataFrame({'col': range(9)},
dtype='int32',
index=(pd.MultiIndex
.from_product([['A0', 'A1', 'A2'],
['B0', 'B1', 'B2']],
names=[1, 2])))
result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :]))
expected = pd.DataFrame({'col': [0, 1, 7, 8]},
dtype='int32',
index=pd.MultiIndex.from_tuples([('A0', 'B0'),
('A0', 'B1'),
('A2', 'B1'),
('A2', 'B2')],
names=[1, 2]))
tm.assert_frame_equal(result, expected)


class TestDataFrameCombineFirst(tm.TestCase, TestData):

Expand Down
6 changes: 3 additions & 3 deletions pandas/util/doctools.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def _insert_index(self, data):
else:
for i in range(idx_nlevels):
data.insert(i, 'Index{0}'.format(i),
data.index.get_level_values(i))
data.index._get_level_values(i))

col_nlevels = data.columns.nlevels
if col_nlevels > 1:
col = data.columns.get_level_values(0)
values = [data.columns.get_level_values(i).values
col = data.columns._get_level_values(0)
values = [data.columns._get_level_values(i).values
for i in range(1, col_nlevels)]
col_df = pd.DataFrame(values)
data.columns = col_df.columns
Expand Down

0 comments on commit 5955804

Please sign in to comment.