In [1]:
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# create a simple dataarray
xr.DataArray(np.random.randn(2, 3))

<xarray.DataArray (dim_0: 2, dim_1: 3)>
array([[ 0.58914032, -1.51998157,  1.24818804],
       [-1.04563186,  0.39435316, -0.23390073]])
Coordinates:
  * dim_0    (dim_0) int64 0 1
  * dim_1    (dim_1) int64 0 1 2

In [4]:
# create DataArray with named dimensions
data = xr.DataArray(np.random.randn(2, 3), [('x', ['a', 'b']), ('y', [-2, 0, 2])])
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [5]:
# create from a pandas Series or DataFrame
xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo'))

<xarray.DataArray 'foo' (dim_0: 3)>
array([0, 1, 2])
Coordinates:
  * dim_0    (dim_0) object 'a' 'b' 'c'

In [6]:
# like in pandas, values is a numpy array that you can modify in-place
data.values

array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])

In [7]:
data.dims

('x', 'y')

In [8]:
data.coords

Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [9]:
data.attrs

OrderedDict()

# Indexing

In [10]:
# positional and by integer label, like numpy
data[[0, 1]]

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [16]:
data[[0],[0]]

<xarray.DataArray (x: 1, y: 1)>
array([[-1.17532247]])
Coordinates:
  * x        (x) <U1 'a'
  * y        (y) int64 -2

In [13]:
# positional and by coordinate label, like pandas
data.loc['a':'b']

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [15]:
data.loc[['a'], [-2]]

<xarray.DataArray (x: 1, y: 1)>
array([[-1.17532247]])
Coordinates:
  * x        (x) <U1 'a'
  * y        (y) int64 -2

In [17]:
# by dimension name and integer label
data.isel(x=slice(2))

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [32]:
data.isel(y=slice(2), x=slice(1,2))

<xarray.DataArray (x: 1, y: 2)>
array([[ 0.15363315, -0.48172288]])
Coordinates:
  * x        (x) <U1 'b'
  * y        (y) int64 -2 0

In [35]:
# by dimension name and coordinate label
data.sel(x=['b', 'a'], y=[0,-2,2])

<xarray.DataArray (x: 2, y: 3)>
array([[-0.48172288,  0.15363315, -0.04613369],
       [ 0.15050424, -1.17532247,  1.04168724]])
Coordinates:
  * x        (x) <U1 'b' 'a'
  * y        (y) int64 0 -2 2

# Computation

In [88]:
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [89]:
data.T

<xarray.DataArray (y: 3, x: 2)>
array([[-1.17532247,  0.15363315],
       [ 0.15050424, -0.48172288],
       [ 1.04168724, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [90]:
data + 10

<xarray.DataArray (x: 2, y: 3)>
array([[  8.82467753,  10.15050424,  11.04168724],
       [ 10.15363315,   9.51827712,   9.95386631]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [91]:
np.sin(data)

<xarray.DataArray (x: 2, y: 3)>
array([[-0.92281412,  0.14993669,  0.86325711],
       [ 0.15302949, -0.46330667, -0.04611732]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [92]:
data.sum()

<xarray.DataArray ()>
array(-0.35735440297783705)

In [36]:
# aggregation operations can use dimension names instead of axis numbers:
data.mean(dim='x')

<xarray.DataArray (y: 3)>
array([-0.51084466, -0.16560932,  0.49777678])
Coordinates:
  * y        (y) int64 -2 0 2

In [38]:
data.sum(dim='y')

<xarray.DataArray (x: 2)>
array([ 0.01686901, -0.37422341])
Coordinates:
  * x        (x) <U1 'a' 'b'

In [39]:
# Arithmetic operations broadcast based on dimension name
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [45]:
data1 = xr.DataArray(np.random.randn(2), [('z', [1,2])])
data1

<xarray.DataArray (z: 2)>
array([ 1.58388816, -0.25904881])
Coordinates:
  * z        (z) int64 1 2

In [46]:
data + data1

<xarray.DataArray (x: 2, y: 3, z: 2)>
array([[[ 0.40856569, -1.43437128],
        [ 1.7343924 , -0.10854457],
        [ 2.6255754 ,  0.78263843]],

       [[ 1.73752131, -0.10541566],
        [ 1.10216529, -0.74077168],
        [ 1.53775448, -0.30518249]]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2
  * z        (z) int64 1 2

In [48]:
data2 = xr.DataArray(np.random.randn(3), [('a', [19,20,30])])
data2

<xarray.DataArray (a: 3)>
array([ 1.50286958,  1.44980792,  0.0404519 ])
Coordinates:
  * a        (a) int64 19 20 30

In [49]:
data1 + data2

<xarray.DataArray (z: 2, a: 3)>
array([[ 3.08675774,  3.03369608,  1.62434006],
       [ 1.24382078,  1.19075911, -0.21859691]])
Coordinates:
  * z        (z) int64 1 2
  * a        (a) int64 19 20 30

In [50]:
data2 + data1

<xarray.DataArray (a: 3, z: 2)>
array([[ 3.08675774,  1.24382078],
       [ 3.03369608,  1.19075911],
       [ 1.62434006, -0.21859691]])
Coordinates:
  * a        (a) int64 19 20 30
  * z        (z) int64 1 2

In [63]:
data3 = xr.DataArray(np.random.randn(2), [('z', [1,2])])
data3

<xarray.DataArray (z: 2)>
array([-0.12143473,  0.40263329])
Coordinates:
  * z        (z) int64 1 2

In [64]:
data1 + data3

<xarray.DataArray (z: 2)>
array([ 1.46245343,  0.14358449])
Coordinates:
  * z        (z) int64 1 2

In [68]:
data3 = data3.rename({'z': 'n'})

In [69]:
data1 + data3

<xarray.DataArray (z: 2, n: 2)>
array([[ 1.46245343,  1.98652145],
       [-0.38048354,  0.14358449]])
Coordinates:
  * z        (z) int64 1 2
  * n        (n) int64 1 2

In [77]:
# you don't have to worry about order of dimensions
data.T

<xarray.DataArray (y: 3, x: 2)>
array([[-1.17532247,  0.15363315],
       [ 0.15050424, -0.48172288],
       [ 1.04168724, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [78]:
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [79]:
data + data.T

<xarray.DataArray (x: 2, y: 3)>
array([[-2.35064494,  0.30100848,  2.08337448],
       [ 0.3072663 , -0.96344575, -0.09226737]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

In [86]:
# Operations also align based on index labels
data[:-1]

<xarray.DataArray (x: 1, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724]])
Coordinates:
  * x        (x) <U1 'a'
  * y        (y) int64 -2 0 2

In [87]:
data[:1]

<xarray.DataArray (x: 1, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724]])
Coordinates:
  * x        (x) <U1 'a'
  * y        (y) int64 -2 0 2

# GroupBy
xarray supports grouped operations using a very similar API to pandas

In [94]:
labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')
labels

<xarray.DataArray 'labels' (y: 3)>
array(['E', 'F', 'E'], 
      dtype='<U1')
Coordinates:
  * y        (y) int64 -2 0 2

In [95]:
data.groupby(labels).mean('y')

<xarray.DataArray (x: 2, labels: 2)>
array([[-0.06681761,  0.15050424],
       [ 0.05374973, -0.48172288]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * labels   (labels) object 'E' 'F'

In [96]:
data.groupby(labels).apply(lambda x: x - x.min())

<xarray.DataArray (x: 2, y: 3)>
array([[ 0.        ,  0.63222711,  2.21700971],
       [ 1.32895562,  0.        ,  1.12918878]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2
    labels   (y) <U1 'E' 'F' 'E'

In [97]:
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.17532247,  0.15050424,  1.04168724],
       [ 0.15363315, -0.48172288, -0.04613369]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 -2 0 2

# Convert to pandas

In [98]:
data.to_series()

x  y 
a  -2   -1.175322
    0    0.150504
    2    1.041687
b  -2    0.153633
    0   -0.481723
    2   -0.046134
dtype: float64

In [99]:
data.to_pandas()

y,-2,0,2
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,-1.175322,0.150504,1.041687
b,0.153633,-0.481723,-0.046134
