In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO

In [2]:
dfj = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))

In [3]:
json = dfj.to_json()

In [4]:
json

'{"A":{"0":1.5848283332,"1":0.645596978,"2":-1.6683166366,"3":0.1049547189,"4":-0.2910407701},"B":{"0":-1.7539158066,"1":1.9227314779,"2":-0.8244853717,"3":-0.626940072,"4":0.4339751634}}'

In [5]:
dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)),
                    columns=list('ABC'), index=list('xyz'))

In [6]:
dfjo

Unnamed: 0,A,B,C
x,1,4,7
y,2,5,8
z,3,6,9


In [7]:
sjo = pd.Series(dict(x=15, y=16, z=17), name='D')

In [8]:
sjo

x    15
y    16
z    17
Name: D, dtype: int64

In [9]:
dfjo.to_json(orient="columns")

'{"A":{"x":1,"y":2,"z":3},"B":{"x":4,"y":5,"z":6},"C":{"x":7,"y":8,"z":9}}'

In [10]:
dfjo.to_json(orient="index")

'{"x":{"A":1,"B":4,"C":7},"y":{"A":2,"B":5,"C":8},"z":{"A":3,"B":6,"C":9}}'

In [11]:
sjo.to_json(orient="index")

'{"x":15,"y":16,"z":17}'

In [12]:
dfjo.to_json(orient="records")

'[{"A":1,"B":4,"C":7},{"A":2,"B":5,"C":8},{"A":3,"B":6,"C":9}]'

In [13]:
sjo.to_json(orient="records")

'[15,16,17]'

In [14]:
dfjo.to_json(orient="values")

'[[1,4,7],[2,5,8],[3,6,9]]'

In [15]:
dfjo.to_json(orient="split")

'{"columns":["A","B","C"],"index":["x","y","z"],"data":[[1,4,7],[2,5,8],[3,6,9]]}'

In [16]:
sjo.to_json(orient="split")

'{"name":"D","index":["x","y","z"],"data":[15,16,17]}'

In [17]:
dfd = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfd

Unnamed: 0,A,B
0,0.18577,-1.400211
1,1.269336,0.309499
2,-0.203365,0.027074
3,1.120477,-0.16682
4,0.309002,-2.530364


In [18]:
dfd['date'] = pd.Timestamp('20130101')
dfd

Unnamed: 0,A,B,date
0,0.18577,-1.400211,2013-01-01
1,1.269336,0.309499,2013-01-01
2,-0.203365,0.027074,2013-01-01
3,1.120477,-0.16682,2013-01-01
4,0.309002,-2.530364,2013-01-01


In [19]:
dfd = dfd.sort_index(1, ascending=False)
dfd

Unnamed: 0,date,B,A
0,2013-01-01,-1.400211,0.18577
1,2013-01-01,0.309499,1.269336
2,2013-01-01,0.027074,-0.203365
3,2013-01-01,-0.16682,1.120477
4,2013-01-01,-2.530364,0.309002


In [20]:
json = dfd.to_json(date_format='iso')

In [21]:
json

'{"date":{"0":"2013-01-01T00:00:00.000Z","1":"2013-01-01T00:00:00.000Z","2":"2013-01-01T00:00:00.000Z","3":"2013-01-01T00:00:00.000Z","4":"2013-01-01T00:00:00.000Z"},"B":{"0":-1.4002107065,"1":0.309498924,"2":0.0270738539,"3":-0.1668201244,"4":-2.5303644214},"A":{"0":0.1857700089,"1":1.2693362347,"2":-0.2033652359,"3":1.1204768353,"4":0.3090016216}}'

In [22]:
json = dfd.to_json(date_format='iso', date_unit='us')

In [23]:
json

'{"date":{"0":"2013-01-01T00:00:00.000000Z","1":"2013-01-01T00:00:00.000000Z","2":"2013-01-01T00:00:00.000000Z","3":"2013-01-01T00:00:00.000000Z","4":"2013-01-01T00:00:00.000000Z"},"B":{"0":-1.4002107065,"1":0.309498924,"2":0.0270738539,"3":-0.1668201244,"4":-2.5303644214},"A":{"0":0.1857700089,"1":1.2693362347,"2":-0.2033652359,"3":1.1204768353,"4":0.3090016216}}'

In [24]:
json = dfd.to_json(date_format='epoch', date_unit='s')

In [25]:
json

'{"date":{"0":1356998400,"1":1356998400,"2":1356998400,"3":1356998400,"4":1356998400},"B":{"0":-1.4002107065,"1":0.309498924,"2":0.0270738539,"3":-0.1668201244,"4":-2.5303644214},"A":{"0":0.1857700089,"1":1.2693362347,"2":-0.2033652359,"3":1.1204768353,"4":0.3090016216}}'

In [26]:
dfj2 = dfj.copy()
dfj2

Unnamed: 0,A,B
0,1.584828,-1.753916
1,0.645597,1.922731
2,-1.668317,-0.824485
3,0.104955,-0.62694
4,-0.291041,0.433975


In [27]:
dfj2['date'] = pd.Timestamp('20130101')

In [28]:
dfj2['ints'] = list(range(5))

In [29]:
dfj2['bools'] = True

In [30]:
dfj2.index = pd.date_range('20130101', periods=5)

In [31]:
dfj2.to_json('test.json')

In [32]:
with open('test.json') as fh:
    print(fh.read())

{"A":{"1356998400000":1.5848283332,"1357084800000":0.645596978,"1357171200000":-1.6683166366,"1357257600000":0.1049547189,"1357344000000":-0.2910407701},"B":{"1356998400000":-1.7539158066,"1357084800000":1.9227314779,"1357171200000":-0.8244853717,"1357257600000":-0.626940072,"1357344000000":0.4339751634},"date":{"1356998400000":1356998400000,"1357084800000":1356998400000,"1357171200000":1356998400000,"1357257600000":1356998400000,"1357344000000":1356998400000},"ints":{"1356998400000":0,"1357084800000":1,"1357171200000":2,"1357257600000":3,"1357344000000":4},"bools":{"1356998400000":true,"1357084800000":true,"1357171200000":true,"1357257600000":true,"1357344000000":true}}


In [33]:
dfj2

Unnamed: 0,A,B,date,ints,bools
2013-01-01,1.584828,-1.753916,2013-01-01,0,True
2013-01-02,0.645597,1.922731,2013-01-01,1,True
2013-01-03,-1.668317,-0.824485,2013-01-01,2,True
2013-01-04,0.104955,-0.62694,2013-01-01,3,True
2013-01-05,-0.291041,0.433975,2013-01-01,4,True


In [34]:
pd.DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str)

'{"0":{"0":"(1+0j)","1":"(2+0j)","2":"(1+2j)"}}'

In [35]:
pd.read_json(json)

Unnamed: 0,date,B,A
0,2013-01-01,-1.400211,0.18577
1,2013-01-01,0.309499,1.269336
2,2013-01-01,0.027074,-0.203365
3,2013-01-01,-0.16682,1.120477
4,2013-01-01,-2.530364,0.309002


In [36]:
pd.read_json('test.json')

Unnamed: 0,A,B,date,ints,bools
2013-01-01,1.584828,-1.753916,2013-01-01,0,True
2013-01-02,0.645597,1.922731,2013-01-01,1,True
2013-01-03,-1.668317,-0.824485,2013-01-01,2,True
2013-01-04,0.104955,-0.62694,2013-01-01,3,True
2013-01-05,-0.291041,0.433975,2013-01-01,4,True


In [37]:
pd.read_json('test.json', dtype=object).dtypes

A        object
B        object
date     object
ints     object
bools    object
dtype: object

In [38]:
pd.read_json('test.json', dtype={'A': 'float32', 'bools': 'int8'}).dtypes

A               float32
B               float64
date     datetime64[ns]
ints              int64
bools              int8
dtype: object

In [39]:
si = pd.DataFrame(np.zeros((4, 4)), columns=list(range(4)),
                  index=[str(i) for i in range(4)])

In [40]:
si

Unnamed: 0,0,1,2,3
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0


In [41]:
si.index

Index(['0', '1', '2', '3'], dtype='object')

In [42]:
si.columns

Int64Index([0, 1, 2, 3], dtype='int64')

In [43]:
json = si.to_json()

In [44]:
sij = pd.read_json(json, convert_axes=False)

In [45]:
sij

Unnamed: 0,0,1,2,3
0,0,0,0,0
1,0,0,0,0
2,0,0,0,0
3,0,0,0,0


In [46]:
sij.index

Index(['0', '1', '2', '3'], dtype='object')

In [47]:
sij.columns

Index(['0', '1', '2', '3'], dtype='object')

In [48]:
json = dfj2.to_json(date_unit='ns')

In [49]:
# Try to parse timestamps as milliseconds -> Won't Work
dfju = pd.read_json(json, date_unit='ms')

In [50]:
dfju

Unnamed: 0,A,B,date,ints,bools
1356998400000000000,1.584828,-1.753916,1356998400000000000,0,True
1357084800000000000,0.645597,1.922731,1356998400000000000,1,True
1357171200000000000,-1.668317,-0.824485,1356998400000000000,2,True
1357257600000000000,0.104955,-0.62694,1356998400000000000,3,True
1357344000000000000,-0.291041,0.433975,1356998400000000000,4,True


In [51]:
# Let pandas detect the correct precision
dfju = pd.read_json(json)
dfju

Unnamed: 0,A,B,date,ints,bools
2013-01-01,1.584828,-1.753916,2013-01-01,0,True
2013-01-02,0.645597,1.922731,2013-01-01,1,True
2013-01-03,-1.668317,-0.824485,2013-01-01,2,True
2013-01-04,0.104955,-0.62694,2013-01-01,3,True
2013-01-05,-0.291041,0.433975,2013-01-01,4,True


In [52]:
# Or specify that all timestamps are in nanoseconds
dfju = pd.read_json(json, date_unit='ns')
dfju

Unnamed: 0,A,B,date,ints,bools
2013-01-01,1.584828,-1.753916,2013-01-01,0,True
2013-01-02,0.645597,1.922731,2013-01-01,1,True
2013-01-03,-1.668317,-0.824485,2013-01-01,2,True
2013-01-04,0.104955,-0.62694,2013-01-01,3,True
2013-01-05,-0.291041,0.433975,2013-01-01,4,True


In [53]:
randfloats = np.random.uniform(-100, 1000, 10000)

In [54]:
randfloats.shape = (1000, 10)

In [55]:
dffloats = pd.DataFrame(randfloats, columns=list('ABCDEFGHIJ'))

In [56]:
jsonfloats = dffloats.to_json()

In [57]:
%timeit pd.read_json(jsonfloats)

16.5 ms ± 325 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [58]:
%timeit pd.read_json(jsonfloats, numpy=True)



12.3 ms ± 505 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [59]:
jsonfloats = dffloats.head(100).to_json()

In [60]:
%timeit pd.read_json(jsonfloats)

10 ms ± 816 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [61]:
%timeit pd.read_json(jsonfloats, numpy=True)



7.92 ms ± 270 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [62]:
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
        {'name': {'given': 'Mose', 'family': 'Regner'}},
        {'id': 2, 'name': 'Faye Raker'}]

In [63]:
pd.json_normalize(data)

Unnamed: 0,id,name.first,name.last,name.given,name.family,name
0,1.0,Coleen,Volk,,,
1,,,,Mose,Regner,
2,2.0,,,,,Faye Raker


In [64]:
data = [{'state': 'Florida',
         'shortname': 'FL',
         'info': {'governor': 'Rick Scott'},
         'county': [{'name': 'Dade', 'population': 12345},
                    {'name': 'Broward', 'population': 40000},
                    {'name': 'Palm Beach', 'population': 60000}]},
        {'state': 'Ohio',
         'shortname': 'OH',
         'info': {'governor': 'John Kasich'},
         'county': [{'name': 'Summit', 'population': 1234},
                    {'name': 'Cuyahoga', 'population': 1337}]}]

In [65]:
pd.json_normalize(data, 'county', ['state', 'shortname', ['info', 'governor']])

Unnamed: 0,name,population,state,shortname,info.governor
0,Dade,12345,Florida,FL,Rick Scott
1,Broward,40000,Florida,FL,Rick Scott
2,Palm Beach,60000,Florida,FL,Rick Scott
3,Summit,1234,Ohio,OH,John Kasich
4,Cuyahoga,1337,Ohio,OH,John Kasich


In [66]:
data = [{'CreatedBy': {'Name': 'User001'},
         'Lookup': {'TextField': 'Some text',
                    'UserField': {'Id': 'ID001',
                                  'Name': 'Name001'}},
         'Image': {'a': 'b'}}]

In [67]:
pd.json_normalize(data, max_level=1)

Unnamed: 0,CreatedBy.Name,Lookup.TextField,Lookup.UserField,Image.a
0,User001,Some text,"{'Id': 'ID001', 'Name': 'Name001'}",b


In [68]:
jsonl = '''
{"a": 1, "b": 2}
{"a": 3, "b": 4}
'''

In [69]:
df = pd.read_json(jsonl, lines=True)

In [70]:
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [71]:
df.to_json(orient='records', lines=True)

'{"a":1,"b":2}\n{"a":3,"b":4}'

In [72]:
# reader is an iterator that returns `chunksize` lines each iteration
reader = pd.read_json(StringIO(jsonl), lines=True, chunksize=1)

In [73]:
reader

<pandas.io.json._json.JsonReader at 0x7711448>

In [74]:
for chunk in reader:
    print(chunk)

Empty DataFrame
Columns: []
Index: []
   a  b
0  1  2
   a  b
1  3  4


In [75]:
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': ['a', 'b', 'c'],
                   'C': pd.date_range('2016-01-01', freq='d', periods=3)},
                  index=pd.Index(range(3), name='idx'))

In [76]:
df

Unnamed: 0_level_0,A,B,C
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,a,2016-01-01
1,2,b,2016-01-02
2,3,c,2016-01-03


In [77]:
df.to_json(orient='table', date_format="iso")

'{"schema":{"fields":[{"name":"idx","type":"integer"},{"name":"A","type":"integer"},{"name":"B","type":"string"},{"name":"C","type":"datetime"}],"primaryKey":["idx"],"pandas_version":"0.20.0"},"data":[{"idx":0,"A":1,"B":"a","C":"2016-01-01T00:00:00.000Z"},{"idx":1,"A":2,"B":"b","C":"2016-01-02T00:00:00.000Z"},{"idx":2,"A":3,"B":"c","C":"2016-01-03T00:00:00.000Z"}]}'

In [78]:
from pandas.io.json import build_table_schema

In [79]:
s = pd.Series(pd.date_range('2016', periods=4))
s

0   2016-01-01
1   2016-01-02
2   2016-01-03
3   2016-01-04
dtype: datetime64[ns]

In [80]:
build_table_schema(s)

{'fields': [{'name': 'index', 'type': 'integer'},
  {'name': 'values', 'type': 'datetime'}],
 'primaryKey': ['index'],
 'pandas_version': '0.20.0'}

In [81]:
s_tz = pd.Series(pd.date_range('2016', periods=12,
                               tz='US/Central'))
s_tz

0    2016-01-01 00:00:00-06:00
1    2016-01-02 00:00:00-06:00
2    2016-01-03 00:00:00-06:00
3    2016-01-04 00:00:00-06:00
4    2016-01-05 00:00:00-06:00
5    2016-01-06 00:00:00-06:00
6    2016-01-07 00:00:00-06:00
7    2016-01-08 00:00:00-06:00
8    2016-01-09 00:00:00-06:00
9    2016-01-10 00:00:00-06:00
10   2016-01-11 00:00:00-06:00
11   2016-01-12 00:00:00-06:00
dtype: datetime64[ns, US/Central]

In [82]:
build_table_schema(s_tz)

{'fields': [{'name': 'index', 'type': 'integer'},
  {'name': 'values', 'type': 'datetime', 'tz': 'US/Central'}],
 'primaryKey': ['index'],
 'pandas_version': '0.20.0'}

In [83]:
s_per = pd.Series(1, index=pd.period_range('2016', freq='A-DEC',
                                           periods=4))
s_per

2016    1
2017    1
2018    1
2019    1
Freq: A-DEC, dtype: int64

In [84]:
build_table_schema(s_per)

{'fields': [{'name': 'index', 'type': 'datetime', 'freq': 'A-DEC'},
  {'name': 'values', 'type': 'integer'}],
 'primaryKey': ['index'],
 'pandas_version': '0.20.0'}

In [85]:
s_cat = pd.Series(pd.Categorical(['a', 'b', 'a']))
s_cat

0    a
1    b
2    a
dtype: category
Categories (2, object): [a, b]

In [86]:
build_table_schema(s_cat)

{'fields': [{'name': 'index', 'type': 'integer'},
  {'name': 'values',
   'type': 'any',
   'constraints': {'enum': ['a', 'b']},
   'ordered': False}],
 'primaryKey': ['index'],
 'pandas_version': '0.20.0'}

In [87]:
s_dupe = pd.Series([1, 2], index=[1, 1])
s_dupe

1    1
1    2
dtype: int64

In [88]:
build_table_schema(s_dupe)

{'fields': [{'name': 'index', 'type': 'integer'},
  {'name': 'values', 'type': 'integer'}],
 'pandas_version': '0.20.0'}

In [89]:
s_multi = pd.Series(1, index=pd.MultiIndex.from_product([('a', 'b'),
                                                         (0, 1)]))
s_multi

a  0    1
   1    1
b  0    1
   1    1
dtype: int64

In [90]:
build_table_schema(s_multi)

{'fields': [{'name': 'level_0', 'type': 'string'},
  {'name': 'level_1', 'type': 'integer'},
  {'name': 'values', 'type': 'integer'}],
 'primaryKey': FrozenList(['level_0', 'level_1']),
 'pandas_version': '0.20.0'}

In [91]:
df = pd.DataFrame({'foo': [1, 2, 3, 4],
                   'bar': ['a', 'b', 'c', 'd'],
                   'baz': pd.date_range('2018-01-01', freq='d', periods=4),
                   'qux': pd.Categorical(['a', 'b', 'c', 'c'])
                  }, index=pd.Index(range(4), name='idx'))

In [92]:
df

Unnamed: 0_level_0,foo,bar,baz,qux
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,a,2018-01-01,a
1,2,b,2018-01-02,b
2,3,c,2018-01-03,c
3,4,d,2018-01-04,c


In [93]:
df.dtypes

foo             int64
bar            object
baz    datetime64[ns]
qux          category
dtype: object

In [94]:
df.to_json('test.json', orient='table')

In [95]:
new_df = pd.read_json('test.json', orient='table')
new_df

Unnamed: 0_level_0,foo,bar,baz,qux
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,a,2018-01-01,a
1,2,b,2018-01-02,b
2,3,c,2018-01-03,c
3,4,d,2018-01-04,c


In [96]:
new_df.dtypes

foo             int64
bar            object
baz    datetime64[ns]
qux          category
dtype: object

In [97]:
df.index.name = 'index'

In [98]:
df.to_json('test.json', orient='table')



In [99]:
new_df = pd.read_json('test.json', orient='table')

In [100]:
print(new_df.index.name)

None
