Skip to content

Commit

Permalink
ENH: add to_xarray conversion method
Browse files Browse the repository at this point in the history
supersedes #11950
xref #10000

Author: Jeff Reback <jeff@reback.net>

Closes #11972 from jreback/xarray and squashes the following commits:

85de0b7 [Jeff Reback] ENH: add to_xarray conversion method
  • Loading branch information
jreback committed Feb 10, 2016
1 parent ab29f93 commit 358da56
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 2 deletions.
1 change: 1 addition & 0 deletions ci/requirements-2.7.run
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ html5lib=1.0b2
beautiful-soup=4.2.1
statsmodels
jinja2=2.8
xarray
1 change: 1 addition & 0 deletions ci/requirements-3.5.run
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ bottleneck
sqlalchemy
pymysql
psycopg2
xarray

# incompat with conda ATM
# beautiful-soup
10 changes: 10 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,7 @@ Serialization / IO / Conversion
Series.to_csv
Series.to_dict
Series.to_frame
Series.to_xarray
Series.to_hdf
Series.to_sql
Series.to_msgpack
Expand Down Expand Up @@ -918,6 +919,7 @@ Reshaping, sorting, transposing
DataFrame.unstack
DataFrame.T
DataFrame.to_panel
DataFrame.to_xarray
DataFrame.transpose

Combining / joining / merging
Expand Down Expand Up @@ -1216,6 +1218,7 @@ Serialization / IO / Conversion
Panel.to_json
Panel.to_sparse
Panel.to_frame
Panel.to_xarray
Panel.to_clipboard

.. _api.panel4d:
Expand All @@ -1230,6 +1233,13 @@ Constructor

Panel4D

Serialization / IO / Conversion
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/

Panel4D.to_xarray

Attributes and underlying data
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**Axes**
Expand Down
1 change: 1 addition & 0 deletions doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ Optional Dependencies
* `Cython <http://www.cython.org>`__: Only necessary to build development
version. Version 0.19.1 or higher.
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
* `xarray <http://xarray.readthedocs.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:

Expand Down
30 changes: 29 additions & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t
s
s.dt.round('D')


Formatting of integer in FloatIndex
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -315,6 +314,35 @@ New Behavior:
s.index
print(s.to_csv(path=None))

.. _whatsnew_0180.enhancements.xarray:

to_xarray
^^^^^^^^^

In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity,
all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has
a pandas-like interface for > 2 ndim.

See the `xarray full-documentation here <http://xarray.pydata.org/en/stable/>`__.

.. code-block:: python

In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4))

In [2]: p.to_xarray()
Out[2]:
<xarray.DataArray (items: 2, major_axis: 3, minor_axis: 4)>
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],

[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]]])
Coordinates:
* items (items) int64 0 1
* major_axis (major_axis) int64 0 1 2
* minor_axis (minor_axis) int64 0 1 2 3

.. _whatsnew_0180.enhancements.other:

Expand Down
26 changes: 26 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
from pandas.io import clipboard
clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)

def to_xarray(self):
"""
Return an xarray object from the pandas object.
Returns
-------
a DataArray for a Series
a Dataset for a DataFrame
a DataArray for higher dims
See Also
--------
`xarray docs <http://xarray.pydata.org/en/stable/>`__
"""
import xarray
if self.ndim == 1:
return xarray.DataArray.from_series(self)
elif self.ndim == 2:
return xarray.Dataset.from_dataframe(self)

# > 2 dims
coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
return xarray.DataArray(self,
coords=coords,
)

# ----------------------------------------------------------------------
# Fancy Indexing

Expand Down
142 changes: 141 additions & 1 deletion pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from distutils.version import LooseVersion
from pandas import (Index, Series, DataFrame, Panel, isnull,
date_range, period_range)
date_range, period_range, Panel4D)
from pandas.core.index import MultiIndex

import pandas.core.common as com
Expand All @@ -18,6 +18,8 @@
from pandas.util.testing import (assert_series_equal,
assert_frame_equal,
assert_panel_equal,
assert_panel4d_equal,
assert_almost_equal,
assert_equal)
import pandas.util.testing as tm

Expand Down Expand Up @@ -1057,6 +1059,52 @@ def test_describe_none(self):
expected = Series([0, 0], index=['count', 'unique'], name='None')
assert_series_equal(noneSeries.describe(), expected)

def test_to_xarray(self):

tm._skip_if_no_xarray()
from xarray import DataArray

s = Series([])
s.index.name = 'foo'
result = s.to_xarray()
self.assertEqual(len(result), 0)
self.assertEqual(len(result.coords), 1)
assert_almost_equal(list(result.coords.keys()), ['foo'])
self.assertIsInstance(result, DataArray)

def testit(index, check_index_type=True):
s = Series(range(6), index=index(6))
s.index.name = 'foo'
result = s.to_xarray()
repr(result)
self.assertEqual(len(result), 6)
self.assertEqual(len(result.coords), 1)
assert_almost_equal(list(result.coords.keys()), ['foo'])
self.assertIsInstance(result, DataArray)

# idempotency
assert_series_equal(result.to_series(), s,
check_index_type=check_index_type)

for index in [tm.makeFloatIndex, tm.makeIntIndex,
tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeDateIndex, tm.makePeriodIndex,
tm.makeTimedeltaIndex]:
testit(index)

# not idempotent
testit(tm.makeCategoricalIndex, check_index_type=False)

s = Series(range(6))
s.index.name = 'foo'
s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)],
names=['one', 'two'])
result = s.to_xarray()
self.assertEqual(len(result), 2)
assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
self.assertIsInstance(result, DataArray)
assert_series_equal(result.to_series(), s)


class TestDataFrame(tm.TestCase, Generic):
_typ = DataFrame
Expand Down Expand Up @@ -1777,11 +1825,103 @@ def test_pct_change(self):

self.assert_frame_equal(result, expected)

def test_to_xarray(self):

tm._skip_if_no_xarray()
from xarray import Dataset

df = DataFrame({'a': list('abc'),
'b': list(range(1, 4)),
'c': np.arange(3, 6).astype('u1'),
'd': np.arange(4.0, 7.0, dtype='float64'),
'e': [True, False, True],
'f': pd.Categorical(list('abc')),
'g': pd.date_range('20130101', periods=3),
'h': pd.date_range('20130101',
periods=3,
tz='US/Eastern')}
)

df.index.name = 'foo'
result = df[0:0].to_xarray()
self.assertEqual(result.dims['foo'], 0)
self.assertIsInstance(result, Dataset)

for index in [tm.makeFloatIndex, tm.makeIntIndex,
tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeDateIndex, tm.makePeriodIndex,
tm.makeCategoricalIndex, tm.makeTimedeltaIndex]:
df.index = index(3)
df.index.name = 'foo'
df.columns.name = 'bar'
result = df.to_xarray()
self.assertEqual(result.dims['foo'], 3)
self.assertEqual(len(result.coords), 1)
self.assertEqual(len(result.data_vars), 8)
assert_almost_equal(list(result.coords.keys()), ['foo'])
self.assertIsInstance(result, Dataset)

# idempotency
# categoricals are not preserved
# datetimes w/tz are not preserved
# column names are lost
expected = df.copy()
expected['f'] = expected['f'].astype(object)
expected['h'] = expected['h'].astype('datetime64[ns]')
expected.columns.name = None
assert_frame_equal(result.to_dataframe(),
expected,
check_index_type=False)

# not implemented
df.index = pd.MultiIndex.from_product([['a'], range(3)],
names=['one', 'two'])
self.assertRaises(ValueError, lambda: df.to_xarray())


class TestPanel(tm.TestCase, Generic):
_typ = Panel
_comparator = lambda self, x, y: assert_panel_equal(x, y)

def test_to_xarray(self):

tm._skip_if_no_xarray()
from xarray import DataArray

p = tm.makePanel()

result = p.to_xarray()
self.assertIsInstance(result, DataArray)
self.assertEqual(len(result.coords), 3)
assert_almost_equal(list(result.coords.keys()),
['items', 'major_axis', 'minor_axis'])
self.assertEqual(len(result.dims), 3)

# idempotency
assert_panel_equal(result.to_pandas(), p)


class TestPanel4D(tm.TestCase, Generic):
_typ = Panel4D
_comparator = lambda self, x, y: assert_panel4d_equal(x, y)

def test_to_xarray(self):

tm._skip_if_no_xarray()
from xarray import DataArray

p = tm.makePanel4D()

result = p.to_xarray()
self.assertIsInstance(result, DataArray)
self.assertEqual(len(result.coords), 4)
assert_almost_equal(list(result.coords.keys()),
['labels', 'items', 'major_axis', 'minor_axis'])
self.assertEqual(len(result.dims), 4)

# non-convertible
self.assertRaises(ValueError, lambda: result.to_pandas())


class TestNDFrame(tm.TestCase):
# tests that don't fit elsewhere
Expand Down
1 change: 1 addition & 0 deletions pandas/util/print_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def show_versions(as_json=False):
("numpy", lambda mod: mod.version.version),
("scipy", lambda mod: mod.version.version),
("statsmodels", lambda mod: mod.__version__),
("xarray", lambda mod: mod.__version__),
("IPython", lambda mod: mod.__version__),
("sphinx", lambda mod: mod.__version__),
("patsy", lambda mod: mod.__version__),
Expand Down
12 changes: 12 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,18 @@ def _skip_if_scipy_0_17():
import nose
raise nose.SkipTest("scipy 0.17")

def _skip_if_no_xarray():
try:
import xarray
except ImportError:
import nose
raise nose.SkipTest("xarray not installed")

v = xarray.__version__
if v < LooseVersion('0.7.0'):
import nose
raise nose.SkipTest("xarray not version is too low: {0}".format(v))

def _skip_if_no_pytz():
try:
import pytz
Expand Down

0 comments on commit 358da56

Please sign in to comment.