Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'name' as argument for index 'to_frame' method #22580

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Other Enhancements
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :meth:Both Index and MultiIndex now support overriding column name(s) when using `to_frame` method (:issue:`22580`).
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved

.. _whatsnew_0240.api_breaking:

Expand Down
19 changes: 16 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None):

return Series(self._to_embed(), index=index, name=name)

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with a column containing the Index.

.. versionadded:: 0.21.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index.

name : object, default None
The passed name should substitute for the index name (if it has
one).

henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
DataFrame
Expand Down Expand Up @@ -1153,10 +1157,19 @@ def to_frame(self, index=True):
0 Ant
1 Bear
2 Cow

henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
To override the name of the resulting column, specify `name`:

>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""

from pandas import DataFrame
name = self.name or 0
if name is None:
name = self.name or 0
result = DataFrame({name: self.values.copy()})

if index:
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,27 +1126,42 @@ def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
Create a DataFrame with the levels of the MultiIndex as columns.

.. versionadded:: 0.20.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original MultiIndex.

name : list / sequence of strings, optional
The passed names should substitute index level names.

Returns
-------
DataFrame : a DataFrame containing the original MultiIndex data.
"""

from pandas import DataFrame
if name is not None:
if not is_list_like(name):
raise TypeError("'name' must be a list / sequence "
"of array-likes.")
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved

if len(name) != len(self.levels):
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("'name' should have same length as "
"number of levels on index")
idx_names = name
else:
idx_names = self.names

result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
zip(idx_names, range(len(self.levels)))},
copy=False)
if index:
result.index = self
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ def test_to_frame(self):
df = idx.to_frame(index=False)
assert df.index is not idx

henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
# See GH-22580
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
new_idx_name = 'new_name'
df = idx.to_frame(name=new_idx_name)

assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == new_idx_name
assert df[new_idx_name].values is not idx.values

df = idx.to_frame(index=False, name=new_idx_name)
assert df.index is not idx

def test_shift(self):

# GH8083 test the base class for shift
Expand Down
34 changes: 32 additions & 2 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ def test_to_frame():
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
index = MultiIndex.from_tuples(tuples)
henriqueribeiro marked this conversation as resolved.
Show resolved Hide resolved
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

msg = "'name' must be a list / sequence of array-likes."
with tm.assert_raises_regex(TypeError, msg):
index.to_frame(name='first')

msg = "'name' should have same length as number of levels on index"
with tm.assert_raises_regex(ValueError, msg):
index.to_frame(name=['first'])

# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
Expand All @@ -45,12 +66,21 @@ def test_to_frame():
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)


def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
Expand Down