From 358da5663a68d9b03ac86ff2f50d57de309a8f65 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 10 Feb 2016 12:42:53 -0500 Subject: [PATCH] ENH: add to_xarray conversion method supersedes #11950 xref #10000 Author: Jeff Reback Closes #11972 from jreback/xarray and squashes the following commits: 85de0b7 [Jeff Reback] ENH: add to_xarray conversion method --- ci/requirements-2.7.run | 1 + ci/requirements-3.5.run | 1 + doc/source/api.rst | 10 +++ doc/source/install.rst | 1 + doc/source/whatsnew/v0.18.0.txt | 30 ++++++- pandas/core/generic.py | 26 ++++++ pandas/tests/test_generic.py | 142 +++++++++++++++++++++++++++++++- pandas/util/print_versions.py | 1 + pandas/util/testing.py | 12 +++ 9 files changed, 222 insertions(+), 2 deletions(-) diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 8fc074b96e0e4..6768a75f5c285 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -20,3 +20,4 @@ html5lib=1.0b2 beautiful-soup=4.2.1 statsmodels jinja2=2.8 +xarray diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index 2401a0fc11673..4ba3b473b3edd 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -17,6 +17,7 @@ bottleneck sqlalchemy pymysql psycopg2 +xarray # incompat with conda ATM # beautiful-soup diff --git a/doc/source/api.rst b/doc/source/api.rst index 52fd8f5838b1c..c572aa9ae2e03 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -684,6 +684,7 @@ Serialization / IO / Conversion Series.to_csv Series.to_dict Series.to_frame + Series.to_xarray Series.to_hdf Series.to_sql Series.to_msgpack @@ -918,6 +919,7 @@ Reshaping, sorting, transposing DataFrame.unstack DataFrame.T DataFrame.to_panel + DataFrame.to_xarray DataFrame.transpose Combining / joining / merging @@ -1216,6 +1218,7 @@ Serialization / IO / Conversion Panel.to_json Panel.to_sparse Panel.to_frame + Panel.to_xarray Panel.to_clipboard .. _api.panel4d: @@ -1230,6 +1233,13 @@ Constructor Panel4D +Serialization / IO / Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Panel4D.to_xarray + Attributes and underlying data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ **Axes** diff --git a/doc/source/install.rst b/doc/source/install.rst index 3df38cdc092e3..3836180af520f 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -244,6 +244,7 @@ Optional Dependencies * `Cython `__: Only necessary to build development version. Version 0.19.1 or higher. * `SciPy `__: miscellaneous statistical functions +* `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index cf1a13d33e17f..10b1bfd7ce085 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -274,7 +274,6 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t s s.dt.round('D') - Formatting of integer in FloatIndex ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -315,6 +314,35 @@ New Behavior: s.index print(s.to_csv(path=None)) +.. _whatsnew_0180.enhancements.xarray: + +to_xarray +^^^^^^^^^ + +In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity, +all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has +a pandas-like interface for > 2 ndim. + +See the `xarray full-documentation here `__. + +.. code-block:: python + + In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4)) + + In [2]: p.to_xarray() + Out[2]: + + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + Coordinates: + * items (items) int64 0 1 + * major_axis (major_axis) int64 0 1 2 + * minor_axis (minor_axis) int64 0 1 2 3 .. _whatsnew_0180.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a746a93c3dc16..1cc46d0e4ffff 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1040,6 +1040,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): from pandas.io import clipboard clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) + def to_xarray(self): + """ + Return an xarray object from the pandas object. + + Returns + ------- + a DataArray for a Series + a Dataset for a DataFrame + a DataArray for higher dims + + See Also + -------- + `xarray docs `__ + """ + import xarray + if self.ndim == 1: + return xarray.DataArray.from_series(self) + elif self.ndim == 2: + return xarray.Dataset.from_dataframe(self) + + # > 2 dims + coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS] + return xarray.DataArray(self, + coords=coords, + ) + # ---------------------------------------------------------------------- # Fancy Indexing diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 7cb0dd249effd..51bcf23cfa17b 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -8,7 +8,7 @@ from distutils.version import LooseVersion from pandas import (Index, Series, DataFrame, Panel, isnull, - date_range, period_range) + date_range, period_range, Panel4D) from pandas.core.index import MultiIndex import pandas.core.common as com @@ -18,6 +18,8 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal, assert_panel_equal, + assert_panel4d_equal, + assert_almost_equal, assert_equal) import pandas.util.testing as tm @@ -1057,6 +1059,52 @@ def test_describe_none(self): expected = Series([0, 0], index=['count', 'unique'], name='None') assert_series_equal(noneSeries.describe(), expected) + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + s = Series([]) + s.index.name = 'foo' + result = s.to_xarray() + self.assertEqual(len(result), 0) + self.assertEqual(len(result.coords), 1) + assert_almost_equal(list(result.coords.keys()), ['foo']) + self.assertIsInstance(result, DataArray) + + def testit(index, check_index_type=True): + s = Series(range(6), index=index(6)) + s.index.name = 'foo' + result = s.to_xarray() + repr(result) + self.assertEqual(len(result), 6) + self.assertEqual(len(result.coords), 1) + assert_almost_equal(list(result.coords.keys()), ['foo']) + self.assertIsInstance(result, DataArray) + + # idempotency + assert_series_equal(result.to_series(), s, + check_index_type=check_index_type) + + for index in [tm.makeFloatIndex, tm.makeIntIndex, + tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex]: + testit(index) + + # not idempotent + testit(tm.makeCategoricalIndex, check_index_type=False) + + s = Series(range(6)) + s.index.name = 'foo' + s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], + names=['one', 'two']) + result = s.to_xarray() + self.assertEqual(len(result), 2) + assert_almost_equal(list(result.coords.keys()), ['one', 'two']) + self.assertIsInstance(result, DataArray) + assert_series_equal(result.to_series(), s) + class TestDataFrame(tm.TestCase, Generic): _typ = DataFrame @@ -1777,11 +1825,103 @@ def test_pct_change(self): self.assert_frame_equal(result, expected) + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import Dataset + + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', + periods=3, + tz='US/Eastern')} + ) + + df.index.name = 'foo' + result = df[0:0].to_xarray() + self.assertEqual(result.dims['foo'], 0) + self.assertIsInstance(result, Dataset) + + for index in [tm.makeFloatIndex, tm.makeIntIndex, + tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeCategoricalIndex, tm.makeTimedeltaIndex]: + df.index = index(3) + df.index.name = 'foo' + df.columns.name = 'bar' + result = df.to_xarray() + self.assertEqual(result.dims['foo'], 3) + self.assertEqual(len(result.coords), 1) + self.assertEqual(len(result.data_vars), 8) + assert_almost_equal(list(result.coords.keys()), ['foo']) + self.assertIsInstance(result, Dataset) + + # idempotency + # categoricals are not preserved + # datetimes w/tz are not preserved + # column names are lost + expected = df.copy() + expected['f'] = expected['f'].astype(object) + expected['h'] = expected['h'].astype('datetime64[ns]') + expected.columns.name = None + assert_frame_equal(result.to_dataframe(), + expected, + check_index_type=False) + + # not implemented + df.index = pd.MultiIndex.from_product([['a'], range(3)], + names=['one', 'two']) + self.assertRaises(ValueError, lambda: df.to_xarray()) + class TestPanel(tm.TestCase, Generic): _typ = Panel _comparator = lambda self, x, y: assert_panel_equal(x, y) + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + p = tm.makePanel() + + result = p.to_xarray() + self.assertIsInstance(result, DataArray) + self.assertEqual(len(result.coords), 3) + assert_almost_equal(list(result.coords.keys()), + ['items', 'major_axis', 'minor_axis']) + self.assertEqual(len(result.dims), 3) + + # idempotency + assert_panel_equal(result.to_pandas(), p) + + +class TestPanel4D(tm.TestCase, Generic): + _typ = Panel4D + _comparator = lambda self, x, y: assert_panel4d_equal(x, y) + + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + p = tm.makePanel4D() + + result = p.to_xarray() + self.assertIsInstance(result, DataArray) + self.assertEqual(len(result.coords), 4) + assert_almost_equal(list(result.coords.keys()), + ['labels', 'items', 'major_axis', 'minor_axis']) + self.assertEqual(len(result.dims), 4) + + # non-convertible + self.assertRaises(ValueError, lambda: result.to_pandas()) + class TestNDFrame(tm.TestCase): # tests that don't fit elsewhere diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index 5c09f877d863b..80c10b53d37b5 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -68,6 +68,7 @@ def show_versions(as_json=False): ("numpy", lambda mod: mod.version.version), ("scipy", lambda mod: mod.version.version), ("statsmodels", lambda mod: mod.__version__), + ("xarray", lambda mod: mod.__version__), ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__), ("patsy", lambda mod: mod.__version__), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0a1249c246ae6..915fd08e2c0c6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -224,6 +224,18 @@ def _skip_if_scipy_0_17(): import nose raise nose.SkipTest("scipy 0.17") +def _skip_if_no_xarray(): + try: + import xarray + except ImportError: + import nose + raise nose.SkipTest("xarray not installed") + + v = xarray.__version__ + if v < LooseVersion('0.7.0'): + import nose + raise nose.SkipTest("xarray not version is too low: {0}".format(v)) + def _skip_if_no_pytz(): try: import pytz