In [1]:
import time
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
def dict_to_dataframe(dataset, name):
    dvals = dataset[name]['values']
    axes_names = dataset[name].get('axes', [])
    coords = [ (a, dataset[a]['values']) for a in axes_names ]
    
    if len(coords) > 0:
        mi = pd.MultiIndex.from_tuples(list(zip(*[v for n, v in coords])), names=axes_names)
        df = pd.DataFrame(dvals, mi)
        df.columns.name = name
    else:
        df = pd.DataFrame(dvals)
        df.columns.name = name
    
    return df

def get_shape(df):
    if isinstance(df.index, pd.MultiIndex):
        axes_vals = [ np.array(l) for l in df.index.levels ]
        axes_names = [ n for n in df.index.names ]
        
    return tuple([v.size for v in axes_vals])

In [18]:
x = np.arange(3, dtype=float)
y = np.arange(3, dtype=float)
xx, yy = np.meshgrid(x, y, indexing='ij')
zz = xx + yy**2

x2 = xx.reshape(-1)[2:-2]
y2 = yy.reshape(-1)[2:-2]
z2 = zz.reshape(-1)[2:-2]


data = {
    'id' : 'Some data',
    'datasets' : {
        'z' : {
            'values' : z2,
            'axes' : ['x', 'y', ],
        },
        'x' : {
            'values' : x2,
        },
        'y' : {
            'values' : y2,
        },
        'w' : {
            'values' : z2,
        }
    }
}

In [19]:
dfz = dict_to_dataframe(data['datasets'], 'z')
dfw = dict_to_dataframe(data['datasets'], 'w')

In [20]:
dfz

Unnamed: 0_level_0,z,0
x,y,Unnamed: 2_level_1
0.0,2.0,4.0
1.0,0.0,1.0
1.0,1.0,2.0
1.0,2.0,5.0
2.0,0.0,2.0


In [25]:
data2 = {
    'id' : 'Some data',
    'datasets' : {
        'z' : {
            'values' : [1., 1.],
            'axes' : ['x', 'y', ],
        },
        'x' : {
            'values' : [0., 0.],
        },
        'y' : {
            'values' : [0., 1.],
        },
        'w' : {
            'values' : [1., 1.],
        }
    }
}

In [26]:
dfz2 = dict_to_dataframe(data2['datasets'], 'z')
dfz2

Unnamed: 0_level_0,z,0
x,y,Unnamed: 2_level_1
0.0,0.0,1.0
0.0,1.0,1.0


In [29]:
dfz_joined = dfz.append(dfz2)
dfz_joined

Unnamed: 0_level_0,z,0
x,y,Unnamed: 2_level_1
0.0,2.0,4.0
1.0,0.0,1.0
1.0,1.0,2.0
1.0,2.0,5.0
2.0,0.0,2.0
0.0,0.0,1.0
0.0,1.0,1.0


In [34]:
dfz_joined.sort_index()

Unnamed: 0_level_0,z,0
x,y,Unnamed: 2_level_1
0.0,0.0,1.0
0.0,1.0,1.0
0.0,2.0,4.0
1.0,0.0,1.0
1.0,1.0,2.0
1.0,2.0,5.0
2.0,0.0,2.0


In [33]:
xr.DataArray(dfz_joined).unstack('dim_0').squeeze()

<xarray.DataArray (x: 3, y: 3)>
array([[ 1.,  1.,  4.],
       [ 1.,  2.,  5.],
       [ 2., nan, nan]])
Coordinates:
    z        int64 0
  * x        (x) float64 0.0 1.0 2.0
  * y        (y) float64 0.0 1.0 2.0

In [21]:
xaz = xr.DataArray(dfz)
xaz

<xarray.DataArray (dim_0: 5, z: 1)>
array([[4.],
       [1.],
       [2.],
       [5.],
       [2.]])
Coordinates:
  * dim_0    (dim_0) MultiIndex
  - x        (dim_0) float64 0.0 1.0 1.0 1.0 2.0
  - y        (dim_0) float64 2.0 0.0 1.0 2.0 0.0
  * z        (z) int64 0

In [22]:
xaz.unstack('dim_0').squeeze()

<xarray.DataArray (x: 3, y: 3)>
array([[nan, nan,  4.],
       [ 1.,  2.,  5.],
       [ 2., nan, nan]])
Coordinates:
    z        int64 0
  * x        (x) float64 0.0 1.0 2.0
  * y        (y) float64 0.0 1.0 2.0

In [35]:
xaw = xr.DataArray(dfw)
xaw

<xarray.DataArray (dim_0: 5, w: 1)>
array([[4.],
       [1.],
       [2.],
       [5.],
       [2.]])
Coordinates:
  * dim_0    (dim_0) int64 0 1 2 3 4
  * w        (w) int64 0