In [1]:
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
xr.DataArray(np.random.randn(2, 3))

<xarray.DataArray (dim_0: 2, dim_1: 3)>
array([[-0.05402 , -2.619762,  0.479137],
       [ 0.772938, -0.7195  , -1.468093]])
Dimensions without coordinates: dim_0, dim_1

In [3]:
data = xr.DataArray(np.random.randn(2, 3), coords={'x': ['a', 'b']}, dims=('x', 'y'))

In [4]:
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [5]:
data.values

array([[-1.16769607, -0.13105522,  0.48371873],
       [ 0.29305836,  2.18516288,  0.53513166]])

In [6]:
data.dims

('x', 'y')

In [7]:
data.coords

Coordinates:
  * x        (x) <U1 'a' 'b'

In [8]:
data.attrs

OrderedDict()

In [9]:
# positional and by integer label, like numpy
data[[0, 1]]

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [10]:
# positional and by coordinate label, like pandas
data.loc['a':'b']

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [12]:
# by dimension name and integer label
data.isel(x=slice(2))

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [13]:
# by dimension name and coordinate label
data.sel(x=['a', 'b'])

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [14]:
 data + 10

<xarray.DataArray (x: 2, y: 3)>
array([[ 8.832304,  9.868945, 10.483719],
       [10.293058, 12.185163, 10.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [15]:
np.sin(data)


<xarray.DataArray (x: 2, y: 3)>
array([[-0.919849, -0.13068 ,  0.465074],
       [ 0.288882,  0.817139,  0.509954]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [16]:
data.T

<xarray.DataArray (y: 3, x: 2)>
array([[-1.167696,  0.293058],
       [-0.131055,  2.185163],
       [ 0.483719,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [17]:
data.sum()

<xarray.DataArray ()>
array(2.19832)

In [18]:
#However, aggregation operations can use dimension names instead of axis numbers:
data.mean(dim='x')

<xarray.DataArray (y: 3)>
array([-0.437319,  1.027054,  0.509425])
Dimensions without coordinates: y

In [27]:
#Arithmetic operations broadcast based on dimension name. 
#This means you don’t need to insert dummy dimensions for alignment:

a = xr.DataArray(np.random.randn(3), [data.coords['y']])
a

<xarray.DataArray (y: 3)>
array([ 0.690412, -0.787427,  0.012232])
Coordinates:
  * y        (y) int64 0 1 2

In [23]:
a.shape

(3,)

In [22]:
b = xr.DataArray(np.random.randn(4), dims='z')
b

<xarray.DataArray (z: 4)>
array([0.286577, 0.964658, 0.519673, 1.431154])
Dimensions without coordinates: z

In [24]:
b.shape

(4,)

In [25]:
a + b

<xarray.DataArray (y: 3, z: 4)>
array([[ 0.269876,  0.947957,  0.502971,  1.414452],
       [-1.16296 , -0.484878, -0.929864, -0.018383],
       [-0.28618 ,  0.391901, -0.053084,  0.858397]])
Coordinates:
  * y        (y) int64 0 1 2
Dimensions without coordinates: z

In [26]:
#It also means that in most cases you do not need to worry about the order of dimensions:

In [28]:
data - data.T

<xarray.DataArray (x: 2, y: 3)>
array([[0., 0., 0.],
       [0., 0., 0.]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [30]:
#Operations also align based on index labels:
data[:-1] - data[:1]

<xarray.DataArray (x: 1, y: 3)>
array([[0., 0., 0.]])
Coordinates:
  * x        (x) <U1 'a'
Dimensions without coordinates: y

GroupBy

xarray supports grouped operations using a very similar API to pandas:

In [31]:
labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')

In [32]:
labels

<xarray.DataArray 'labels' (y: 3)>
array(['E', 'F', 'E'], dtype='<U1')
Coordinates:
  * y        (y) int64 0 1 2

In [37]:
data

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

In [36]:
data.groupby(labels)

<xarray.core.groupby.DataArrayGroupBy at 0x10bf3d710>

In [34]:
data.groupby(labels).groups

{'E': [0, 2], 'F': [1]}

In [35]:
data.groupby(labels).mean('y')

<xarray.DataArray (x: 2, labels: 2)>
array([[-0.341989, -0.131055],
       [ 0.414095,  2.185163]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * labels   (labels) object 'E' 'F'

pandas

Xarray objects can be easily converted to and from pandas objects:

In [38]:
series = data.to_series()

In [39]:
series

x  y
a  0   -1.167696
   1   -0.131055
   2    0.483719
b  0    0.293058
   1    2.185163
   2    0.535132
dtype: float64

In [41]:
# convert back
series.to_xarray()

<xarray.DataArray (x: 2, y: 3)>
array([[-1.167696, -0.131055,  0.483719],
       [ 0.293058,  2.185163,  0.535132]])
Coordinates:
  * x        (x) object 'a' 'b'
  * y        (y) int64 0 1 2