In [25]:
import pandas as pd
import numpy as np
import xarray as xr

Example from http://pandas.pydata.org/pandas-docs/stable/reshaping.html#multiple-levels

In [22]:
columns = pd.MultiIndex.from_tuples([
            ('A', 'cat', 'long'), ('B', 'cat', 'long'),
            ('A', 'dog', 'short'), ('B', 'dog', 'short')
        ],
        names=['exp', 'animal', 'hair_length']
)

In [3]:
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)

In [21]:
# Initial multi-level dataframe
df

exp,A,B,A,B
animal,cat,cat,dog,dog
hair_length,long,long,short,short
0,-1.145181,0.363892,2.579573,1.402121
1,0.530969,0.927138,0.157234,-1.06209
2,-0.987123,-0.255092,0.7364,-0.323554
3,1.81135,-1.379535,-2.155606,1.740007


In [23]:
df.to_csv('test_multi-level.csv')
pd.read_csv('test_multi-level.csv')

Unnamed: 0,exp,A,B,A.1,B.1
0,animal,cat,cat,dog,dog
1,hair_length,long,long,short,short
2,,,,,
3,0,-1.14518144534,0.363891588014,2.57957252125,1.40212060318
4,1,0.530968580207,0.927138456086,0.157234471535,-1.06209042971
5,2,-0.987123012949,-0.255091737606,0.736400241118,-0.323553882654
6,3,1.81134997095,-1.37953499014,-2.15560594924,1.74000728995


In [36]:
df.to_xarray()

<xarray.Dataset>
Dimensions:                (index: 4)
Coordinates:
  * index                  (index) int64 0 1 2 3
Data variables:
    ('A', 'cat', 'long')   (index) float64 -1.145 0.531 -0.9871 1.811
    ('B', 'cat', 'long')   (index) float64 0.3639 0.9271 -0.2551 -1.38
    ('A', 'dog', 'short')  (index) float64 2.58 0.1572 0.7364 -2.156
    ('B', 'dog', 'short')  (index) float64 1.402 -1.062 -0.3236 1.74

In [37]:
# Level names (e.g hair_length) are not round-tripped (inconvenient, but not a deal breaker, can use integer values).
df.to_xarray().to_dataframe()

Unnamed: 0_level_0,A,B,A,B
Unnamed: 0_level_1,cat,cat,dog,dog
Unnamed: 0_level_2,long,long,short,short
index,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
0,-1.145181,0.363892,2.579573,1.402121
1,0.530969,0.927138,0.157234,-1.06209
2,-0.987123,-0.255092,0.7364,-0.323554
3,1.81135,-1.379535,-2.155606,1.740007


In [40]:
# But serialization fails for a multi-index
# Known open github issue: https://github.com/pydata/xarray/issues/719
df.to_xarray().to_netcdf('test_multi-level.nc')

TypeError: DataArray.name or Dataset key must be either a string or None for serialization to netCDF files

In [43]:
# Write works
df.to_json('test_multi-level.json')

# Read in fails
# Open github issue: https://github.com/pydata/pandas/issues/9146
pd.read_json('test_multi-level.json')

ValueError: No ':' found when decoding object value

In [50]:
# hdf requires the additional dependency of PyTables
df.to_hdf('test_multi-level.hdf', 'f')

ImportError: HDFStore requires PyTables, "dlopen(//anaconda/lib/python2.7/site-packages/tables/utilsextension.so, 2): Library not loaded: @loader_path/../../../libhdf5.7.dylib
  Referenced from: //anaconda/lib/python2.7/site-packages/tables/utilsextension.so
  Reason: image not found" problem importing

In [54]:
# As expected pickle recovers everything
df.to_pickle('test_multi-level.p')
pd.read_pickle('test_multi-level.p')

exp,A,B,A,B
animal,cat,cat,dog,dog
hair_length,long,long,short,short
0,-1.145181,0.363892,2.579573,1.402121
1,0.530969,0.927138,0.157234,-1.06209
2,-0.987123,-0.255092,0.7364,-0.323554
3,1.81135,-1.379535,-2.155606,1.740007
