In [1]:
import pandas as pd
pd.options.display.max_rows=12
pd.__version__

'0.19.0+712.g66140df'

## build schema

https://github.com/nteract/nteract/pull/1534

In [2]:
df = pd.DataFrame(
        {'A': [1, 2, 3],
         'B': ['a', 'b', 'c'],
         'C': pd.date_range('2016-01-01', freq='d', periods=3),
        }, index=pd.Index(range(3), name='idx'))
df

Unnamed: 0_level_0,A,B,C
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,a,2016-01-01
1,2,b,2016-01-02
2,3,c,2016-01-03


In [3]:
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

In [4]:
df.to_json(orient='table')

'{"schema": {"fields":[{"name":"idx","type":"integer"},{"name":"A","type":"integer"},{"name":"B","type":"string"},{"name":"C","type":"datetime"}],"primaryKey":["idx"],"pandas_version":"0.20.0"}, "data": [{"idx":0,"A":1,"B":"a","C":"2016-01-01T00:00:00.000Z"},{"idx":1,"A":2,"B":"b","C":"2016-01-02T00:00:00.000Z"},{"idx":2,"A":3,"B":"c","C":"2016-01-03T00:00:00.000Z"}]}'

In [5]:
RenderJSON(df.to_json(orient='table'))

<img src="https://camo.githubusercontent.com/638215e5f7484a266fe712a4ab59b5931a16a3df/687474703a2f2f672e7265636f726469742e636f2f73675a477843666c426d2e676966">

## deprecate .ix

In [6]:
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [4, 5, 6]},
                index=list('abc'))
df

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [7]:
df.ix[[0, 2], 'A']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  if __name__ == '__main__':



a    1
c    3
Name: A, dtype: int64

In [8]:
df.loc[df.index[[0, 2]], 'A']


a    1
c    3
Name: A, dtype: int64

In [9]:
df.iloc[[0, 2], df.columns.get_loc('A')]


a    1
c    3
Name: A, dtype: int64

## Panel Deprecation

In [10]:
from pandas.util import testing as tm
p = tm.makePanel()
p


<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 30 (major_axis) x 4 (minor_axis)
Items axis: ItemA to ItemC
Major_axis axis: 2000-01-03 00:00:00 to 2000-02-11 00:00:00
Minor_axis axis: A to D

In [11]:
p.to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,ItemA,ItemB,ItemC
major,minor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,A,-0.375145,0.586722,-1.317563
2000-01-03,B,0.755556,-0.158258,0.939822
2000-01-03,C,0.137546,-0.416288,0.790865
2000-01-03,D,-0.178856,-0.511952,0.452189
2000-01-04,A,1.167450,-1.392837,-0.099673
2000-01-04,B,0.085941,0.991742,-0.122769
...,...,...,...,...
2000-02-10,C,-0.853315,-1.314359,1.298981
2000-02-10,D,-1.473551,-1.024697,-0.108618
2000-02-11,A,-1.596060,0.315752,-0.457702


In [12]:
p.to_xarray()

  from pandas.tslib import OutOfBoundsDatetime


<xarray.DataArray (items: 3, major_axis: 30, minor_axis: 4)>
array([[[-0.375145,  0.755556,  0.137546, -0.178856],
        [ 1.16745 ,  0.085941, -0.873849,  3.617286],
        ..., 
        [ 0.282738, -0.666555, -0.853315, -1.473551],
        [-1.59606 , -1.175703, -0.131263, -0.897316]],

       [[ 0.586722, -0.158258, -0.416288, -0.511952],
        [-1.392837,  0.991742,  1.147797, -1.218715],
        ..., 
        [ 0.47374 ,  0.891614, -1.314359, -1.024697],
        [ 0.315752,  0.225676,  0.324331, -2.263131]],

       [[-1.317563,  0.939822,  0.790865,  0.452189],
        [-0.099673, -0.122769, -0.255115, -0.384474],
        ..., 
        [ 0.078082,  0.355299,  1.298981, -0.108618],
        [-0.457702, -2.098909,  1.50482 , -0.331393]]])
Coordinates:
  * items       (items) object 'ItemA' 'ItemB' 'ItemC'
  * major_axis  (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ...
  * minor_axis  (minor_axis) object 'A' 'B' 'C' 'D'

## I/O Support

## Pickle

In [13]:
df = pd.DataFrame({
        'A': np.random.randn(1000),
        'B': 'foo'})

In [14]:
df.to_pickle('foo.pkl.gzip', compression='gzip')

In [15]:
pd.read_pickle('foo.pkl.gzip', compression='gzip')

Unnamed: 0,A,B
0,1.589551,foo
1,0.652824,foo
2,-0.655902,foo
3,-0.920054,foo
4,0.347747,foo
5,0.535859,foo
...,...,...
994,0.705266,foo
995,0.296323,foo
996,0.095735,foo


## Feather

In [16]:
df.to_feather('foo.fth')

  inferred_type = pd.lib.infer_dtype(col)


In [17]:
pd.read_feather('foo.fth')

Unnamed: 0,A,B
0,1.589551,foo
1,0.652824,foo
2,-0.655902,foo
3,-0.920054,foo
4,0.347747,foo
5,0.535859,foo
...,...,...
994,0.705266,foo
995,0.296323,foo
996,0.095735,foo


## Parquet

In [18]:
df.to_parquet('foo.pq', engine='pyarrow', compression='gzip')

In [19]:
pd.read_parquet('foo.pq', engine='pyarrow')

Unnamed: 0,A,B
0,1.589551,foo
1,0.652824,foo
2,-0.655902,foo
3,-0.920054,foo
4,0.347747,foo
5,0.535859,foo
...,...,...
994,0.705266,foo
995,0.296323,foo
996,0.095735,foo


In [20]:
# cleanup
import os
os.remove('foo.pq')
os.remove('foo.fth')
os.remove('foo.pkl.gzip')