# Pandas JSON

In [1]:
import pandas as pd
import numpy as np
import json

## Read JSON File

In [None]:
with open('file.json') as f:
    d = json.loads(f.read())

## DataFrame From Dictionary

In [2]:
# dummy json data
d = {}
for i in range(2):
    for j,k in enumerate(np.random.rand(2)):
        d[(i,j)] = {'data1':k, 'data2':np.square(k), 'data3':np.sqrt(k), 'data4':np.log(k)}

In [3]:
d

{(0, 0): {'data1': 0.36763612809007506,
  'data2': 0.13515632267706207,
  'data3': 0.60633004881011388,
  'data4': -1.0006616123448944},
 (0, 1): {'data1': 0.1425640998745108,
  'data2': 0.02032452257302949,
  'data3': 0.37757661457578484,
  'data4': -1.9479635567329971},
 (1, 0): {'data1': 0.67871583725104201,
  'data2': 0.46065518773538294,
  'data3': 0.82384211912904892,
  'data4': -0.38755274085918401},
 (1, 1): {'data1': 0.37571703918967791,
  'data2': 0.14116329353745796,
  'data3': 0.61295761614460575,
  'data4': -0.97891897425076624}}

In [4]:
# create DataFrame from dictionary where keys are DataFrame multiindex
df = pd.DataFrame.from_dict(d, orient='index')

In [5]:
# rename multiindex
df.index.names = ['category1', 'category2']

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,data3,data1,data2,data4
category1,category2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,0.60633,0.367636,0.135156,-1.000662
0,1,0.377577,0.142564,0.020325,-1.947964
1,0,0.823842,0.678716,0.460655,-0.387553
1,1,0.612958,0.375717,0.141163,-0.978919


## Dictionary From DataFrame

In [7]:
df.to_dict()

{'data1': {(0, 0): 0.36763612809007506,
  (0, 1): 0.1425640998745108,
  (1, 0): 0.67871583725104201,
  (1, 1): 0.37571703918967791},
 'data2': {(0, 0): 0.13515632267706207,
  (0, 1): 0.02032452257302949,
  (1, 0): 0.46065518773538294,
  (1, 1): 0.14116329353745796},
 'data3': {(0, 0): 0.60633004881011388,
  (0, 1): 0.37757661457578484,
  (1, 0): 0.82384211912904892,
  (1, 1): 0.61295761614460575},
 'data4': {(0, 0): -1.0006616123448944,
  (0, 1): -1.9479635567329971,
  (1, 0): -0.38755274085918401,
  (1, 1): -0.97891897425076624}}

To get back to original format we had, first transpose.  

In [8]:
df.T.to_dict()

{(0, 0): {'data1': 0.36763612809007506,
  'data2': 0.13515632267706207,
  'data3': 0.60633004881011388,
  'data4': -1.0006616123448944},
 (0, 1): {'data1': 0.1425640998745108,
  'data2': 0.02032452257302949,
  'data3': 0.37757661457578484,
  'data4': -1.9479635567329971},
 (1, 0): {'data1': 0.67871583725104201,
  'data2': 0.46065518773538294,
  'data3': 0.82384211912904892,
  'data4': -0.38755274085918401},
 (1, 1): {'data1': 0.37571703918967791,
  'data2': 0.14116329353745796,
  'data3': 0.61295761614460575,
  'data4': -0.97891897425076624}}