diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 9c3412d35d286..6e6bb1aa012ee 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1539,8 +1539,9 @@ The ``MultiIndex`` object is the hierarchical analogue of the standard ``Index`` object which typically stores the axis labels in pandas objects. You can think of ``MultiIndex`` an array of tuples where each tuple is unique. A ``MultiIndex`` can be created from a list of arrays (using -``MultiIndex.from_arrays``) or an array of tuples (using -``MultiIndex.from_tuples``). +``MultiIndex.from_arrays``), an array of tuples (using +``MultiIndex.from_tuples``), or a crossed set of iterables (using +``MultiIndex.from_product``). .. ipython:: python @@ -1552,6 +1553,14 @@ can think of ``MultiIndex`` an array of tuples where each tuple is unique. A s = Series(randn(8), index=index) s +When you want every pairing of the elements in two iterables, it can be easier +to use the ``MultiIndex.from_product`` function: + +.. ipython:: python + + iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']] + MultiIndex.from_product(iterables, names=['first', 'second']) + As a convenience, you can pass a list of arrays directly into Series or DataFrame to construct a MultiIndex automatically: diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt index ded0e3b495be2..0a827c0ceab22 100644 --- a/doc/source/v0.13.1.txt +++ b/doc/source/v0.13.1.txt @@ -73,6 +73,16 @@ Enhancements improves parsing perf in many cases. Thanks to @lexual for suggesting and @danbirken for rapidly implementing. (:issue:`5490`, :issue:`6021`) +- ``MultiIndex.from_product`` convenience function for creating a MultiIndex from + the cartesian product of a set of iterables (:issue:`6055`): + + .. ipython:: python + + shades = ['light', 'dark'] + colors = ['red', 'green', 'blue'] + + MultiIndex.from_product([shades, colors], names=['shade', 'color']) + - The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently limit precision based on the values in the array (:issue:`3401`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 86344b1cc2161..c42d7a29bb8f6 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2491,6 +2491,8 @@ def from_arrays(cls, arrays, sortorder=None, names=None): See Also -------- MultiIndex.from_tuples : Convert list of tuples to MultiIndex + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables """ from pandas.core.categorical import Categorical @@ -2534,6 +2536,8 @@ def from_tuples(cls, tuples, sortorder=None, names=None): See Also -------- MultiIndex.from_arrays : Convert list of arrays to MultiIndex + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables """ if len(tuples) == 0: # I think this is right? Not quite sure... @@ -2552,6 +2556,45 @@ def from_tuples(cls, tuples, sortorder=None, names=None): return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) + @classmethod + def from_product(cls, iterables, sortorder=None, names=None): + """ + Make a MultiIndex from the cartesian product of multiple iterables + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of strings or None + Names for the levels in the index. + + Returns + ------- + index : MultiIndex + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = [u'green', u'purple'] + >>> MultiIndex.from_product([numbers, colors], + names=['number', 'color']) + MultiIndex(levels=[[0, 1, 2], [u'green', u'purple']], + labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=[u'number', u'color']) + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex + MultiIndex.from_tuples : Convert list of tuples to MultiIndex + """ + from pandas.tools.util import cartesian_product + product = cartesian_product(iterables) + return MultiIndex.from_arrays(product, sortorder=sortorder, + names=names) + @property def nlevels(self): return len(self.levels) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index de6092a65f507..fc94a9da00dae 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1561,6 +1561,20 @@ def test_from_arrays(self): result = MultiIndex.from_arrays(arrays) self.assertEquals(list(result), list(self.index)) + def test_from_product(self): + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), + ('bar', 'a'), ('bar', 'b'), ('bar', 'c'), + ('buz', 'a'), ('buz', 'b'), ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + assert_array_equal(result, expected) + self.assertEquals(result.names, names) + def test_append(self): result = self.index[:3].append(self.index[3:]) self.assert_(result.equals(self.index))