From 358da5663a68d9b03ac86ff2f50d57de309a8f65 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 10 Feb 2016 12:42:53 -0500
Subject: [PATCH] ENH: add to_xarray conversion method

supersedes #11950
xref #10000

Author: Jeff Reback <jeff@reback.net>

Closes #11972 from jreback/xarray and squashes the following commits:

85de0b7 [Jeff Reback] ENH: add to_xarray conversion method
---
 ci/requirements-2.7.run         |   1 +
 ci/requirements-3.5.run         |   1 +
 doc/source/api.rst              |  10 +++
 doc/source/install.rst          |   1 +
 doc/source/whatsnew/v0.18.0.txt |  30 ++++++-
 pandas/core/generic.py          |  26 ++++++
 pandas/tests/test_generic.py    | 142 +++++++++++++++++++++++++++++++-
 pandas/util/print_versions.py   |   1 +
 pandas/util/testing.py          |  12 +++
 9 files changed, 222 insertions(+), 2 deletions(-)

diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
index 8fc074b96e0e4..6768a75f5c285 100644
--- a/ci/requirements-2.7.run
+++ b/ci/requirements-2.7.run
@@ -20,3 +20,4 @@ html5lib=1.0b2
 beautiful-soup=4.2.1
 statsmodels
 jinja2=2.8
+xarray
diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
index 2401a0fc11673..4ba3b473b3edd 100644
--- a/ci/requirements-3.5.run
+++ b/ci/requirements-3.5.run
@@ -17,6 +17,7 @@ bottleneck
 sqlalchemy
 pymysql
 psycopg2
+xarray
 
 # incompat with conda ATM
 # beautiful-soup
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 52fd8f5838b1c..c572aa9ae2e03 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -684,6 +684,7 @@ Serialization / IO / Conversion
    Series.to_csv
    Series.to_dict
    Series.to_frame
+   Series.to_xarray
    Series.to_hdf
    Series.to_sql
    Series.to_msgpack
@@ -918,6 +919,7 @@ Reshaping, sorting, transposing
    DataFrame.unstack
    DataFrame.T
    DataFrame.to_panel
+   DataFrame.to_xarray
    DataFrame.transpose
 
 Combining / joining / merging
@@ -1216,6 +1218,7 @@ Serialization / IO / Conversion
    Panel.to_json
    Panel.to_sparse
    Panel.to_frame
+   Panel.to_xarray
    Panel.to_clipboard
 
 .. _api.panel4d:
@@ -1230,6 +1233,13 @@ Constructor
 
    Panel4D
 
+Serialization / IO / Conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: generated/
+
+   Panel4D.to_xarray
+
 Attributes and underlying data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 **Axes**
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 3df38cdc092e3..3836180af520f 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -244,6 +244,7 @@ Optional Dependencies
 * `Cython <http://www.cython.org>`__: Only necessary to build development
   version. Version 0.19.1 or higher.
 * `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
+* `xarray <http://xarray.readthedocs.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
 * `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
 * `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
 
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index cf1a13d33e17f..10b1bfd7ce085 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -274,7 +274,6 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t
    s
    s.dt.round('D')
 
-
 Formatting of integer in FloatIndex
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -315,6 +314,35 @@ New Behavior:
    s.index
    print(s.to_csv(path=None))
 
+.. _whatsnew_0180.enhancements.xarray:
+
+to_xarray
+^^^^^^^^^
+
+In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity,
+all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has
+a pandas-like interface for > 2 ndim.
+
+See the `xarray full-documentation here <http://xarray.pydata.org/en/stable/>`__.
+
+.. code-block:: python
+
+   In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4))
+
+   In [2]: p.to_xarray()
+   Out[2]:
+   <xarray.DataArray (items: 2, major_axis: 3, minor_axis: 4)>
+   array([[[ 0,  1,  2,  3],
+           [ 4,  5,  6,  7],
+           [ 8,  9, 10, 11]],
+
+          [[12, 13, 14, 15],
+           [16, 17, 18, 19],
+           [20, 21, 22, 23]]])
+   Coordinates:
+     * items       (items) int64 0 1
+     * major_axis  (major_axis) int64 0 1 2
+     * minor_axis  (minor_axis) int64 0 1 2 3
 
 .. _whatsnew_0180.enhancements.other:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a746a93c3dc16..1cc46d0e4ffff 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1040,6 +1040,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
         from pandas.io import clipboard
         clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)
 
+    def to_xarray(self):
+        """
+        Return an xarray object from the pandas object.
+
+        Returns
+        -------
+        a DataArray for a Series
+        a Dataset for a DataFrame
+        a DataArray for higher dims
+
+        See Also
+        --------
+        `xarray docs <http://xarray.pydata.org/en/stable/>`__
+        """
+        import xarray
+        if self.ndim == 1:
+            return xarray.DataArray.from_series(self)
+        elif self.ndim == 2:
+            return xarray.Dataset.from_dataframe(self)
+
+        # > 2 dims
+        coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
+        return xarray.DataArray(self,
+                                coords=coords,
+                                )
+
     # ----------------------------------------------------------------------
     # Fancy Indexing
 
diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
index 7cb0dd249effd..51bcf23cfa17b 100644
--- a/pandas/tests/test_generic.py
+++ b/pandas/tests/test_generic.py
@@ -8,7 +8,7 @@
 
 from distutils.version import LooseVersion
 from pandas import (Index, Series, DataFrame, Panel, isnull,
-                    date_range, period_range)
+                    date_range, period_range, Panel4D)
 from pandas.core.index import MultiIndex
 
 import pandas.core.common as com
@@ -18,6 +18,8 @@
 from pandas.util.testing import (assert_series_equal,
                                  assert_frame_equal,
                                  assert_panel_equal,
+                                 assert_panel4d_equal,
+                                 assert_almost_equal,
                                  assert_equal)
 import pandas.util.testing as tm
 
@@ -1057,6 +1059,52 @@ def test_describe_none(self):
         expected = Series([0, 0], index=['count', 'unique'], name='None')
         assert_series_equal(noneSeries.describe(), expected)
 
+    def test_to_xarray(self):
+
+        tm._skip_if_no_xarray()
+        from xarray import DataArray
+
+        s = Series([])
+        s.index.name = 'foo'
+        result = s.to_xarray()
+        self.assertEqual(len(result), 0)
+        self.assertEqual(len(result.coords), 1)
+        assert_almost_equal(list(result.coords.keys()), ['foo'])
+        self.assertIsInstance(result, DataArray)
+
+        def testit(index, check_index_type=True):
+            s = Series(range(6), index=index(6))
+            s.index.name = 'foo'
+            result = s.to_xarray()
+            repr(result)
+            self.assertEqual(len(result), 6)
+            self.assertEqual(len(result.coords), 1)
+            assert_almost_equal(list(result.coords.keys()), ['foo'])
+            self.assertIsInstance(result, DataArray)
+
+            # idempotency
+            assert_series_equal(result.to_series(), s,
+                                check_index_type=check_index_type)
+
+        for index in [tm.makeFloatIndex, tm.makeIntIndex,
+                      tm.makeStringIndex, tm.makeUnicodeIndex,
+                      tm.makeDateIndex, tm.makePeriodIndex,
+                      tm.makeTimedeltaIndex]:
+            testit(index)
+
+        # not idempotent
+        testit(tm.makeCategoricalIndex, check_index_type=False)
+
+        s = Series(range(6))
+        s.index.name = 'foo'
+        s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)],
+                                             names=['one', 'two'])
+        result = s.to_xarray()
+        self.assertEqual(len(result), 2)
+        assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
+        self.assertIsInstance(result, DataArray)
+        assert_series_equal(result.to_series(), s)
+
 
 class TestDataFrame(tm.TestCase, Generic):
     _typ = DataFrame
@@ -1777,11 +1825,103 @@ def test_pct_change(self):
 
             self.assert_frame_equal(result, expected)
 
+    def test_to_xarray(self):
+
+        tm._skip_if_no_xarray()
+        from xarray import Dataset
+
+        df = DataFrame({'a': list('abc'),
+                        'b': list(range(1, 4)),
+                        'c': np.arange(3, 6).astype('u1'),
+                        'd': np.arange(4.0, 7.0, dtype='float64'),
+                        'e': [True, False, True],
+                        'f': pd.Categorical(list('abc')),
+                        'g': pd.date_range('20130101', periods=3),
+                        'h': pd.date_range('20130101',
+                                           periods=3,
+                                           tz='US/Eastern')}
+                       )
+
+        df.index.name = 'foo'
+        result = df[0:0].to_xarray()
+        self.assertEqual(result.dims['foo'], 0)
+        self.assertIsInstance(result, Dataset)
+
+        for index in [tm.makeFloatIndex, tm.makeIntIndex,
+                      tm.makeStringIndex, tm.makeUnicodeIndex,
+                      tm.makeDateIndex, tm.makePeriodIndex,
+                      tm.makeCategoricalIndex, tm.makeTimedeltaIndex]:
+            df.index = index(3)
+            df.index.name = 'foo'
+            df.columns.name = 'bar'
+            result = df.to_xarray()
+            self.assertEqual(result.dims['foo'], 3)
+            self.assertEqual(len(result.coords), 1)
+            self.assertEqual(len(result.data_vars), 8)
+            assert_almost_equal(list(result.coords.keys()), ['foo'])
+            self.assertIsInstance(result, Dataset)
+
+            # idempotency
+            # categoricals are not preserved
+            # datetimes w/tz are not preserved
+            # column names are lost
+            expected = df.copy()
+            expected['f'] = expected['f'].astype(object)
+            expected['h'] = expected['h'].astype('datetime64[ns]')
+            expected.columns.name = None
+            assert_frame_equal(result.to_dataframe(),
+                               expected,
+                               check_index_type=False)
+
+        # not implemented
+        df.index = pd.MultiIndex.from_product([['a'], range(3)],
+                                              names=['one', 'two'])
+        self.assertRaises(ValueError, lambda: df.to_xarray())
+
 
 class TestPanel(tm.TestCase, Generic):
     _typ = Panel
     _comparator = lambda self, x, y: assert_panel_equal(x, y)
 
+    def test_to_xarray(self):
+
+        tm._skip_if_no_xarray()
+        from xarray import DataArray
+
+        p = tm.makePanel()
+
+        result = p.to_xarray()
+        self.assertIsInstance(result, DataArray)
+        self.assertEqual(len(result.coords), 3)
+        assert_almost_equal(list(result.coords.keys()),
+                            ['items', 'major_axis', 'minor_axis'])
+        self.assertEqual(len(result.dims), 3)
+
+        # idempotency
+        assert_panel_equal(result.to_pandas(), p)
+
+
+class TestPanel4D(tm.TestCase, Generic):
+    _typ = Panel4D
+    _comparator = lambda self, x, y: assert_panel4d_equal(x, y)
+
+    def test_to_xarray(self):
+
+        tm._skip_if_no_xarray()
+        from xarray import DataArray
+
+        p = tm.makePanel4D()
+
+        result = p.to_xarray()
+        self.assertIsInstance(result, DataArray)
+        self.assertEqual(len(result.coords), 4)
+        assert_almost_equal(list(result.coords.keys()),
+                            ['labels', 'items', 'major_axis', 'minor_axis'])
+        self.assertEqual(len(result.dims), 4)
+
+        # non-convertible
+        self.assertRaises(ValueError, lambda: result.to_pandas())
+
 
 class TestNDFrame(tm.TestCase):
     # tests that don't fit elsewhere
diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py
index 5c09f877d863b..80c10b53d37b5 100644
--- a/pandas/util/print_versions.py
+++ b/pandas/util/print_versions.py
@@ -68,6 +68,7 @@ def show_versions(as_json=False):
         ("numpy", lambda mod: mod.version.version),
         ("scipy", lambda mod: mod.version.version),
         ("statsmodels", lambda mod: mod.__version__),
+        ("xarray", lambda mod: mod.__version__),
         ("IPython", lambda mod: mod.__version__),
         ("sphinx", lambda mod: mod.__version__),
         ("patsy", lambda mod: mod.__version__),
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 0a1249c246ae6..915fd08e2c0c6 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -224,6 +224,18 @@ def _skip_if_scipy_0_17():
         import nose
         raise nose.SkipTest("scipy 0.17")
 
+def _skip_if_no_xarray():
+    try:
+        import xarray
+    except ImportError:
+        import nose
+        raise nose.SkipTest("xarray not installed")
+
+    v = xarray.__version__
+    if v < LooseVersion('0.7.0'):
+        import nose
+        raise nose.SkipTest("xarray not version is too low: {0}".format(v))
+
 def _skip_if_no_pytz():
     try:
         import pytz