# xarray sandbox

In [2]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [174]:
# creating a DataSet

N_EPOCHS = 10
N_EXECUTIONS = 5

loss = np.random.randn(N_EPOCHS, N_EXECUTIONS)
acc = np.random.randn(N_EPOCHS, N_EXECUTIONS)

# Creating first the d
loss_array = xr.DataArray(loss, 
                          dims=('epoch', 'execution'), 
                          coords={'epoch': range(1, N_EPOCHS + 1),
                                  'execution': range(1, N_EXECUTIONS + 1)})

acc_array = xr.DataArray(acc, 
                         dims=('epoch', 'execution'), 
                         coords={'epoch': range(1, N_EPOCHS + 1),
                                 'execution': range(1, N_EXECUTIONS + 1)})

ds = xr.Dataset({'loss': loss_array, 'acc': acc_array})

ds = xr.Dataset({'loss': (['epoch', 'execution'], loss),
                 'acc': (['epoch', 'execution'], acc)},
               coords={'epoch': range(1, N_EPOCHS + 1),
                       'execution': range(1, N_EXECUTIONS + 1)})
ds

<xarray.Dataset>
Dimensions:    (epoch: 10, execution: 5)
Coordinates:
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10
  * execution  (execution) int64 1 2 3 4 5
Data variables:
    loss       (epoch, execution) float64 -0.04847 -1.292 1.037 ... 0.4237 2.149
    acc        (epoch, execution) float64 -0.6515 -0.231 ... -0.2281 1.592

In [170]:
# removing a coordinate
del ds.coords['epoch']
ds

<xarray.Dataset>
Dimensions:    (epoch: 10, execution: 5)
Coordinates:
  * execution  (execution) int64 1 2 3 4 5
Dimensions without coordinates: epoch
Data variables:
    loss       (epoch, execution) float64 -0.9607 -1.046 ... 0.3628 -0.6589
    acc        (epoch, execution) float64 0.3625 0.8584 ... -0.2857 0.03906

In [172]:
# adding a coordinate
ds.coords['epoch'] = range(1, N_EPOCHS + 1)
ds

<xarray.Dataset>
Dimensions:    (epoch: 10, execution: 5)
Coordinates:
  * execution  (execution) int64 1 2 3 4 5
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10
Data variables:
    loss       (epoch, execution) float64 -0.9607 -1.046 ... 0.3628 -0.6589
    acc        (epoch, execution) float64 0.3625 0.8584 ... -0.2857 0.03906

In [181]:
# drop some values from a dimension
ds.drop(range(10,7,-1), dim='epoch')

<xarray.Dataset>
Dimensions:    (epoch: 7, execution: 5)
Coordinates:
  * epoch      (epoch) int64 1 2 3 4 5 6 7
  * execution  (execution) int64 1 2 3 4 5
Data variables:
    loss       (epoch, execution) float64 -0.04847 -1.292 ... 0.1013 -1.654
    acc        (epoch, execution) float64 -0.6515 -0.231 ... -1.733 0.5724

In [184]:
# calculate a mean over a dimension (other:max, min, std, argmax, argmin)
ds.mean(dim='execution')

<xarray.Dataset>
Dimensions:  (epoch: 10)
Coordinates:
  * epoch    (epoch) int64 1 2 3 4 5 6 7 8 9 10
Data variables:
    loss     (epoch) float64 0.4369 0.07907 -0.1033 ... -0.03968 0.07503 0.286
    acc      (epoch) float64 -0.204 0.04171 -0.8956 ... 0.6096 -0.3395 0.399

In [198]:
# get DataArray numpy array
da = ds['loss']
da.values

array([[-0.04846689, -1.29182251,  1.03652958,  1.70417559,  0.78409497],
       [-1.35965872,  0.55839635,  0.30891169, -0.44549729,  1.3332192 ],
       [ 0.07011795, -0.40287164, -0.43455689, -0.48114954,  0.73196795],
       [-0.46814131, -0.72705042,  0.86179899,  0.58091809,  0.46228318],
       [ 0.38375277, -0.89923157, -1.7073643 ,  1.02657029,  1.81444068],
       [-0.33947497, -1.05026245, -0.23983039, -0.76000465,  0.88427197],
       [-0.00777243, -0.15252755,  0.07604388,  0.10127745, -1.65370728],
       [ 0.41579521,  1.45719707, -0.89762926,  0.52994197, -1.70371207],
       [ 0.45519688, -1.99452217,  1.46305222, -0.08660514,  0.53805169],
       [-0.55170033, -0.95535799,  0.36463419,  0.42367091,  2.14873152]])

In [203]:
mean_da = ds.mean(dim='execution')
max_epoch = mean_da.argmax(dim='epoch')
da.isel(epoch=max_epoch)

TypeError: cannot use a Dataset as an indexer

In [115]:
a = ds > 0.5
a['acc']

<xarray.DataArray 'acc' (epoch: 10, execution: 5)>
array([[ True, False,  True,  True,  True],
       [False, False,  True, False, False],
       [False,  True, False, False, False],
       [False, False, False, False, False],
       [False, False,  True, False,  True],
       [False,  True, False,  True, False],
       [False,  True, False, False, False],
       [False, False,  True, False, False],
       [False, False, False, False,  True],
       [ True, False, False, False, False]])
Coordinates:
  * execution  (execution) int64 1 2 3 4 5
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10

In [87]:
ds.acc

<xarray.DataArray 'acc' (epoch: 10, execution: 5)>
array([[ 1.126653, -0.7535  , -0.355075,  1.913565,  1.173138],
       [ 0.695237,  1.081052, -0.957118, -1.137464,  0.546375],
       [-1.081425,  1.272038, -0.382493,  0.631412,  0.512766],
       [-0.025001, -0.181577,  2.283338,  0.158237, -1.536429],
       [ 1.661175, -0.157278,  0.208936, -0.620396, -1.217017],
       [ 0.095339,  0.609729, -1.130088, -1.865286, -0.01064 ],
       [-2.452466, -1.701488, -1.146147, -0.024833, -0.497993],
       [ 0.375222,  0.299981, -0.733491, -0.425897, -0.743248],
       [-0.248358, -0.26233 ,  1.000242, -0.497756, -0.667929],
       [ 1.872524,  1.524778,  0.558469,  0.314695,  0.114059]])
Coordinates:
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10
  * execution  (execution) int64 1 2 3 4 5

In [102]:
da = ds['acc']
print(da.sel(epoch=[1,2,3], execution=1))
print(da.loc[1:3,1])

<xarray.DataArray 'acc' (epoch: 3)>
array([ 1.126653,  0.695237, -1.081425])
Coordinates:
  * epoch      (epoch) int64 1 2 3
    execution  int64 1
<xarray.DataArray 'acc' (epoch: 3)>
array([ 1.126653,  0.695237, -1.081425])
Coordinates:
  * epoch      (epoch) int64 1 2 3
    execution  int64 1


In [51]:
ds['loss'] = (['epoch', 'execution'], loss)

In [78]:
ds.coords['test'] = (('epoch','execution', 'dataset', 'architecture', 'id'), acc)
ds

<xarray.Dataset>
Dimensions:    (epoch: 10, execution: 5)
Coordinates:
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10
  * execution  (execution) int64 1 2 3 4 5
    test       (epoch, execution) float64 1.038 -0.2443 ... -0.3422 -0.08301
Data variables:
    loss       (epoch, execution) float64 0.3363 -1.467 ... -0.4873 -1.183
    acc        (epoch, execution) float64 1.038 -0.2443 ... -0.3422 -0.08301

In [70]:
for a,b in ds.items():
    print(a,b)

loss <xarray.DataArray 'loss' (epoch: 10, execution: 5)>
array([[ 0.336255, -1.466941, -0.175837,  0.426458,  0.406906],
       [-0.106626,  0.777929,  0.966031,  0.005305,  1.396248],
       [ 0.290197,  0.196068,  1.648602, -0.312114, -0.904905],
       [-1.340316, -2.025447,  0.991599, -1.209522, -0.022976],
       [-1.185576,  0.51183 ,  1.428277, -0.435609,  0.419525],
       [-1.090769, -0.434373,  0.815173,  1.360907, -0.872371],
       [-1.061178,  0.858986,  0.400999, -0.988071,  0.61892 ],
       [-0.00243 , -0.254511,  0.131093,  0.047865,  0.858334],
       [ 0.093301,  0.125958,  1.276391, -0.40337 , -0.913546],
       [ 1.010723,  0.43181 ,  0.108414, -0.487322, -1.182704]])
Coordinates:
  * epoch      (epoch) int64 1 2 3 4 5 6 7 8 9 10
  * execution  (execution) int64 1 2 3 4 5
acc <xarray.DataArray 'acc' (epoch: 10, execution: 5)>
array([[ 1.037741, -0.244262, -0.43176 ,  0.839744, -0.031125],
       [ 1.983816,  0.108492,  0.549378,  1.456855, -0.594062],
       [ 1.23

In [121]:
da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
                 coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
da

<xarray.DataArray (x: 3, y: 4)>
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
Coordinates:
  * x        (x) int64 0 1 2
  * y        (y) <U1 'a' 'b' 'c' 'd'

In [125]:
 da[[0,1], [1,1]]

<xarray.DataArray (x: 2, y: 2)>
array([[1, 1],
       [5, 5]])
Coordinates:
  * x        (x) int64 0 1
  * y        (y) <U1 'b' 'b'

In [127]:
ds = xr.tutorial.open_dataset('air_temperature')

In [128]:
ds

<xarray.Dataset>
Dimensions:  (lat: 25, lon: 53, time: 2920)
Coordinates:
  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0
  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0
  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00
Data variables:
    air      (time, lat, lon) float32 ...
Attributes:
    Conventions:  COARDS
    title:        4x daily NMC reanalysis (1948)
    description:  Data is from NMC initialized reanalysis\n(4x/day).  These a...
    platform:     Model
    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...

In [131]:
xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])})

<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1)
Dimensions without coordinates: x, y, z
Data variables:
    foo      (x, y, z) int64 42
    bar      (y, z) int64 24