pydata · shoyer · Jan 23, 2017 · Dec 28, 2016 · Dec 28, 2016 · Dec 28, 2016
diff --git a/doc/api.rst b/doc/api.rst
@@ -122,6 +122,7 @@ Computation
    Dataset.groupby_bins
    Dataset.resample
    Dataset.diff
+   Dataset.quantile
 
 **Aggregation**:
 :py:attr:`~Dataset.all`
@@ -270,6 +271,7 @@ Computation
    DataArray.get_axis_num
    DataArray.diff
    DataArray.dot
+   DataArray.quantile
 
 **Aggregation**:
 :py:attr:`~DataArray.all`

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -184,6 +184,9 @@ Enhancements
   and attributes. The method prints to a buffer (e.g. ``stdout``) with output
   similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`).
   By `Joe Hamman <https://github.com/jhamman>`_.
+- New :py:meth:`~DataArray.quantile` method to calculate quantiles from
+  DataArray objects (:issue:`1187`).
+  By `Joe Hamman <https://github.com/jhamman>`_.
 
 Bug fixes
 ~~~~~~~~~

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1736,5 +1736,52 @@ def dot(self, other):
 
         return type(self)(new_data, new_coords, new_dims)
 
+    def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
+        """Compute the qth quantile of the data along the specified dimension.
+
+        Returns the qth quantiles(s) of the array elements.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+        keep_attrs : bool, optional
+            If True, the dataset's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
+
+        Returns
+        -------
+        quantiles : DataArray
+            If `q` is a single quantile, then the result
+            is a scalar. If multiple percentiles are given, first axis of
+            the result corresponds to the quantile and a quantile dimension
+            is added to the return array. The other dimensions are the
+             dimensions that remain after the reduction of the array.
+
+        See Also
+        --------
+        np.nanpercentile, pd.Series.quantile, xr.Dataset.quantile
+        """
+
+        ds = self._to_temp_dataset().quantile(q, dim=dim, keep_attrs=keep_attrs,
+                                              interpolation=interpolation)
+        return self._from_temp_dataset(ds)
+
+
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -28,7 +28,6 @@
                        broadcast_variables)
 from .pycompat import (iteritems, basestring, OrderedDict,
                        dask_array_type, range)
-from .formatting import ensure_valid_repr
 from .combine import concat
 from .options import OPTIONS
 
@@ -2547,6 +2546,93 @@ def roll(self, **shifts):
 
         return self._replace_vars_and_dims(variables)
 
+    def quantile(self, q, dim=None, interpolation='linear',
+                 numeric_only=False, keep_attrs=False):
+        """Compute the qth quantile of the data along the specified dimension.
+
+        Returns the qth quantiles(s) of the array elements for each variable
+        in the Dataset.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+        keep_attrs : bool, optional
+            If True, the dataset's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
+        numeric_only : bool, optional
+            If True, only apply ``func`` to variables with a numeric dtype.
+
+        Returns
+        -------
+        quantiles : Dataset
+            If `q` is a single quantile, then the result is a scalar for each
+            variable in data_vars. If multiple percentiles are given, first
+            axis of the result corresponds to the quantile and a quantile
+            dimension is added to the return Dataset. The other dimensions are
+            the dimensions that remain after the reduction of the array.
+
+        See Also
+        --------
+        np.nanpercentile, pd.Series.quantile, xr.DataArray.quantile
+        """
+
+        if isinstance(dim, basestring):
+            dims = set([dim])
+        elif dim is None:
+            dims = set(self.dims)
+        else:
+            dims = set(dim)
+
+        _assert_empty([dim for dim in dims if dim not in self.dims],
+                      'Dataset does not contain the dimensions: %s')
+
+        q = np.asarray(q, dtype=np.float64)
+
+        variables = OrderedDict()
+        for name, var in iteritems(self.variables):
+            reduce_dims = [dim for dim in var.dims if dim in dims]
+            if reduce_dims or not var.dims:
+                if name not in self.coords:
+                    if (not numeric_only or
+                        np.issubdtype(var.dtype, np.number) or
+                            var.dtype == np.bool_):
+                        if len(reduce_dims) == var.ndim:
+                            # prefer to aggregate over axis=None rather than
+                            # axis=(0, 1) if they will be equivalent, because
+                            # the former is often more efficient
+                            reduce_dims = None
+                        variables[name] = var.quantile(
+                            q, dim=reduce_dims, interpolation=interpolation)
+
+            else:
+                variables[name] = var
+
+        # construct the new dataset
+        coord_names = set(k for k in self.coords if k in variables)
+        attrs = self.attrs if keep_attrs else None
+        new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
+        if 'quantile' in new.dims:
+            new.coords['quantile'] = Variable('quantile', q)
+        else:
+            new.coords['quantile'] = q
+        return new
+
     @property
     def real(self):
         return self._unary_op(lambda x: x.real, keep_attrs=True)(self)

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -410,6 +410,7 @@ def inject_reduce_methods(cls):
             extra_args=cls._reduce_extra_args_docstring)
         setattr(cls, name, func)
 
+
 def inject_cum_methods(cls):
     methods = ([(name, globals()[name], True) for name in NAN_CUM_METHODS])
     for name, f, include_skipna in methods:

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -6,7 +6,6 @@
 import contextlib
 import functools
 import itertools
-import os.path
 import re
 import warnings
 from collections import Mapping, MutableMapping, Iterable
@@ -102,7 +101,9 @@ def equivalent(first, second):
     if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
         return ops.array_equiv(first, second)
     else:
-        return first is second or first == second or (pd.isnull(first) and pd.isnull(second))
+        return (first is second or
+                first == second or
+                (pd.isnull(first) and pd.isnull(second)))
 
 
 def peek_at(iterable):
@@ -179,12 +180,14 @@ def combine_pos_and_kw_args(pos_kwargs, kw_kwargs, func_name):
 
 
 def is_scalar(value):
-    """ Whether to treat a value as a scalar. Any non-iterable, string, or 0-D array """
-    return (
-        getattr(value, 'ndim', None) == 0
-        or isinstance(value, (basestring, bytes_type))
-        or not isinstance(value, Iterable))
+    """Whether to treat a value as a scalar.
 
+    Any non-iterable, string, or 0-D array
+    """
+    return (
+        getattr(value, 'ndim', None) == 0 or
+        isinstance(value, (basestring, bytes_type)) or not
+        isinstance(value, Iterable))
 
 
 def is_valid_numpy_dtype(dtype):
@@ -205,8 +208,8 @@ def to_0d_object_array(value):
 
 def to_0d_array(value):
     """Given a value, wrap it in a 0-D numpy.ndarray."""
-    if np.isscalar(value) or (isinstance(value, np.ndarray)
-                                and value.ndim == 0):
+    if np.isscalar(value) or (isinstance(value, np.ndarray) and
+                              value.ndim == 0):
         return np.array(value)
     else:
         return to_0d_object_array(value)

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -5,6 +5,7 @@
 from collections import defaultdict
 import functools
 import itertools
+from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd
@@ -1031,6 +1032,75 @@ def no_conflicts(self, other):
         """
         return self.broadcast_equals(other, equiv=ops.array_notnull_equiv)
 
+    def quantile(self, q, dim=None, interpolation='linear'):
+        """Compute the qth quantile of the data along the specified dimension.
+
+        Returns the qth quantiles(s) of the array elements.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+
+        Returns
+        -------
+        quantiles : Variable
+            If `q` is a single quantile, then the result
+            is a scalar. If multiple percentiles are given, first axis of
+            the result corresponds to the quantile and a quantile dimension
+            is added to the return array. The other dimensions are the
+             dimensions that remain after the reduction of the array.
+
+        See Also
+        --------
+        np.nanpercentile, pd.Series.quantile,
+        xr.Dataset.quantile, xr.DataArray.quantile
+        """
+
+        if isinstance(self.data, dask_array_type):
+            TypeError("quantile does not work for arrays stored as dask "
+                      "arrays. Load the data via .compute() or .load() prior "
+                      "to calling this method.")
+        if LooseVersion(np.__version__) < LooseVersion('1.10.0'):
+            raise NotImplementedError(
+                'quantile requres numpy version 1.10.0 or later')
+
+        q = np.asarray(q, dtype=np.float64)
+
+        new_dims = list(self.dims)
+        if dim is not None:
+            axis = self.get_axis_num(dim)
+            if utils.is_scalar(dim):
+                new_dims.remove(dim)
+            else:
+                for d in dim:
+                    new_dims.remove(d)
+        else:
+            axis = None
+            new_dims = []
+
+        # only add the quantile dimension if q is array like
+        if q.ndim != 0:
+            new_dims = ['quantile'] + new_dims
+
+        qs = np.nanpercentile(self.data, q * 100., axis=axis,
+                              interpolation=interpolation)
+        return Variable(new_dims, qs)
+
     @property
     def real(self):
         return type(self)(self.dims, self.data.real, self._attrs)

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -7,6 +7,7 @@
 import pytest
 from copy import deepcopy
 from textwrap import dedent
+from distutils.version import LooseVersion
 
 import xarray as xr
 
@@ -1328,6 +1329,18 @@ def test_reduce(self):
         expected = DataArray(5, {'c': -999})
         self.assertDataArrayIdentical(expected, actual)
 
+    @pytest.mark.skipif(LooseVersion(np.__version__) < LooseVersion('1.10.0'),
+                        reason='requires numpy version 1.10.0 or later')
+    # skip due to bug in older versions of numpy.nanpercentile
+    def test_quantile(self):
+        for q in [0.25, [0.50], [0.25, 0.75]]:
+            for axis, dim in zip([None, 0, [0], [0, 1]],
+                                 [None, 'x', ['x'], ['x', 'y']]):
+                actual = self.dv.quantile(q, dim=dim)
+                expected = np.nanpercentile(self.dv.values, np.array(q) * 100,
+                                            axis=axis)
+                np.testing.assert_allclose(actual.values, expected)
+
     def test_reduce_keep_attrs(self):
         # Test dropped attrs
         vm = self.va.mean()