Skip to content

Commit

Permalink
API: Add 'name' as argument for index 'to_frame' method (#22580)
Browse files Browse the repository at this point in the history
  • Loading branch information
henriqueribeiro authored and gfyoung committed Sep 14, 2018
1 parent b7e5704 commit 08bd3e3
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Other Enhancements
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).

.. _whatsnew_0240.api_breaking:

Expand Down
19 changes: 16 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None):

return Series(self._to_embed(), index=index, name=name)

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with a column containing the Index.
.. versionadded:: 0.21.0
.. versionadded:: 0.24.0
Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index.
name : object, default None
The passed name should substitute for the index name (if it has
one).
Returns
-------
DataFrame
Expand Down Expand Up @@ -1153,10 +1157,19 @@ def to_frame(self, index=True):
0 Ant
1 Bear
2 Cow
To override the name of the resulting column, specify `name`:
>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""

from pandas import DataFrame
name = self.name or 0
if name is None:
name = self.name or 0
result = DataFrame({name: self.values.copy()})

if index:
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,20 +1126,23 @@ def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with the levels of the MultiIndex as columns.
Column ordering is determined by the DataFrame constructor with data as
a dict.
.. versionadded:: 0.20.0
.. versionadded:: 0.24.0
Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original MultiIndex.
name : list / sequence of strings, optional
The passed names should substitute index level names.
Returns
-------
DataFrame : a DataFrame containing the original MultiIndex data.
Expand All @@ -1150,10 +1153,22 @@ def to_frame(self, index=True):
"""

from pandas import DataFrame
if name is not None:
if not is_list_like(name):
raise TypeError("'name' must be a list / sequence "
"of column names.")

if len(name) != len(self.levels):
raise ValueError("'name' should have same length as "
"number of levels on index.")
idx_names = name
else:
idx_names = self.names

result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
zip(idx_names, range(len(self.levels)))},
copy=False)
if index:
result.index = self
Expand Down
19 changes: 12 additions & 7 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,24 @@ def test_to_series_with_arguments(self):
assert s.index is not idx
assert s.name != idx.name

def test_to_frame(self):
# see gh-15230
@pytest.mark.parametrize("name", [None, "new_name"])
def test_to_frame(self, name):
# see GH-15230, GH-22580
idx = self.create_index()
name = idx.name or 0

df = idx.to_frame()
if name:
idx_name = name
else:
idx_name = idx.name or 0

df = idx.to_frame(name=idx_name)

assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == name
assert df[name].values is not idx.values
assert df.columns[0] == idx_name
assert df[idx_name].values is not idx.values

df = idx.to_frame(index=False)
df = idx.to_frame(index=False, name=idx_name)
assert df.index is not idx

def test_shift(self):
Expand Down
34 changes: 32 additions & 2 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ def test_to_frame():
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

msg = "'name' must be a list / sequence of column names."
with tm.assert_raises_regex(TypeError, msg):
index.to_frame(name='first')

msg = "'name' should have same length as number of levels on index."
with tm.assert_raises_regex(ValueError, msg):
index.to_frame(name=['first'])

# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
Expand All @@ -45,12 +66,21 @@ def test_to_frame():
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)


def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
Expand Down

0 comments on commit 08bd3e3

Please sign in to comment.