In [1]:
import pandas as pd
pd.options.display.max_rows=12
pd.__version__

'0.19.0+712.g66140df'

## build schema

In [2]:
df = pd.DataFrame(
        {'A': [1, 2, 3],
         'B': ['a', 'b', 'c'],
         'C': pd.date_range('2016-01-01', freq='d', periods=3),
        }, index=pd.Index(range(3), name='idx'))
df

Unnamed: 0_level_0,A,B,C
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,a,2016-01-01
1,2,b,2016-01-02
2,3,c,2016-01-03


In [3]:
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

In [4]:
df.to_json(orient='table')

'{"schema": {"fields":[{"name":"idx","type":"integer"},{"name":"A","type":"integer"},{"name":"B","type":"string"},{"name":"C","type":"datetime"}],"primaryKey":["idx"],"pandas_version":"0.20.0"}, "data": [{"idx":0,"A":1,"B":"a","C":"2016-01-01T00:00:00.000Z"},{"idx":1,"A":2,"B":"b","C":"2016-01-02T00:00:00.000Z"},{"idx":2,"A":3,"B":"c","C":"2016-01-03T00:00:00.000Z"}]}'

In [5]:
RenderJSON(df.to_json(orient='table'))

## deprecate .ix

In [6]:
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [4, 5, 6]},
                index=list('abc'))
df

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [7]:
df.ix[[0, 2], 'A']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  if __name__ == '__main__':



a    1
c    3
Name: A, dtype: int64

In [8]:
df.loc[df.index[[0, 2]], 'A']


a    1
c    3
Name: A, dtype: int64

In [9]:
df.iloc[[0, 2], df.columns.get_loc('A')]


a    1
c    3
Name: A, dtype: int64

## Panel Deprecation

In [10]:
from pandas.util import testing as tm
p = tm.makePanel()
p


<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 30 (major_axis) x 4 (minor_axis)
Items axis: ItemA to ItemC
Major_axis axis: 2000-01-03 00:00:00 to 2000-02-11 00:00:00
Minor_axis axis: A to D

In [11]:
p.to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,ItemA,ItemB,ItemC
major,minor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,A,0.554375,0.102788,0.252704
2000-01-03,B,-1.008729,-1.743201,-1.504752
2000-01-03,C,-0.863862,-0.154922,-0.003295
2000-01-03,D,-0.590095,0.133610,-1.397518
2000-01-04,A,2.742404,-1.385856,1.024589
2000-01-04,B,-0.522179,0.221957,0.777994
...,...,...,...,...
2000-02-10,C,1.164913,-0.723135,0.483671
2000-02-10,D,0.615187,-0.437562,-0.940147
2000-02-11,A,1.255148,-0.301200,0.157915


In [12]:
p.to_xarray()

  from pandas.tslib import OutOfBoundsDatetime


<xarray.DataArray (items: 3, major_axis: 30, minor_axis: 4)>
array([[[ 0.554375, -1.008729, -0.863862, -0.590095],
        [ 2.742404, -0.522179,  0.156641, -0.526499],
        ..., 
        [-0.209677, -0.671472,  1.164913,  0.615187],
        [ 1.255148,  1.240764,  1.298236,  0.93667 ]],

       [[ 0.102788, -1.743201, -0.154922,  0.13361 ],
        [-1.385856,  0.221957, -0.223744,  0.221995],
        ..., 
        [-0.030756, -1.291795, -0.723135, -0.437562],
        [-0.3012  , -1.128344,  0.610738, -0.101723]],

       [[ 0.252704, -1.504752, -0.003295, -1.397518],
        [ 1.024589,  0.777994,  0.970927, -0.793256],
        ..., 
        [-0.547522, -0.552909,  0.483671, -0.940147],
        [ 0.157915, -2.211801,  0.537774,  0.913355]]])
Coordinates:
  * items       (items) object 'ItemA' 'ItemB' 'ItemC'
  * major_axis  (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ...
  * minor_axis  (minor_axis) object 'A' 'B' 'C' 'D'

## I/O Support

## Pickle

In [13]:
df = pd.DataFrame({
        'A': np.random.randn(1000),
        'B': 'foo'})

In [14]:
df.to_pickle('foo.pkl', compression='gzip')

In [15]:
pd.read_pickle('foo.pkl', compression='gzip')

Unnamed: 0,A,B
0,-0.300678,foo
1,-0.526549,foo
2,-0.345750,foo
3,-1.635370,foo
4,-1.316605,foo
5,-0.654825,foo
...,...,...
994,1.200574,foo
995,-0.621297,foo
996,-0.610772,foo


## Feather

In [16]:
df.to_feather('foo.fth')

  inferred_type = pd.lib.infer_dtype(col)


In [17]:
pd.read_feather('foo.fth')

Unnamed: 0,A,B
0,-0.300678,foo
1,-0.526549,foo
2,-0.345750,foo
3,-1.635370,foo
4,-1.316605,foo
5,-0.654825,foo
...,...,...
994,1.200574,foo
995,-0.621297,foo
996,-0.610772,foo


## Parquet

In [18]:
df.to_parquet('foo.pq', engine='pyarrow', compression='gzip')

In [19]:
pd.read_parquet('foo.pq', engine='pyarrow')

Unnamed: 0,A,B
0,-0.300678,foo
1,-0.526549,foo
2,-0.345750,foo
3,-1.635370,foo
4,-1.316605,foo
5,-0.654825,foo
...,...,...
994,1.200574,foo
995,-0.621297,foo
996,-0.610772,foo
