pandas-dev · dberenbaum · Jul 30, 2018 · Aug 3, 2018 · Aug 3, 2018 · Jan 15, 2019
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -421,6 +421,7 @@ Other Enhancements
 - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method <io.sql.method>` section in the documentation. (:issue:`8953`)
 - :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`)
 - :meth:`DataFrame.to_json`, :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_XXX` etc. now support tilde(~) in path argument. (:issue:`23473`)
+- :func: qcut now accepts ``bounded`` as a keyword argument, allowing for unbounded quantiles such that the lower/upper bounds are -inf/inf (:issue:`17282`)
 
 .. _whatsnew_0240.api_breaking:
 

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -10,7 +10,7 @@
 from pandas.core.dtypes.common import (
     _NS_DTYPE, ensure_int64, is_categorical_dtype, is_datetime64_dtype,
     is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_integer,
-    is_scalar, is_timedelta64_dtype)
+    is_integer_dtype, is_scalar, is_timedelta64_dtype)
 from pandas.core.dtypes.missing import isna
 
 from pandas import (
@@ -244,7 +244,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
                                 series_index, name, dtype)
 
 
-def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
+def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise',
+         bounded=True):
     """
     Quantile-based discretization function. Discretize variable into
     equal-sized buckets based on rank or based on sample quantiles. For example
@@ -271,6 +272,12 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
 
         .. versionadded:: 0.20.0
 
+    bounded : bool, default True
+        Use the min/max of the distribution as the lower/upper bounds if True,
+        otherwise use -inf/inf. Ignored if dtype is datetime/timedelta.
+
+        .. versionadded:: 0.24.0
+
     Returns
     -------
     out : Categorical or Series or array of integers if labels is False
@@ -308,6 +315,11 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
     else:
         quantiles = q
     bins = algos.quantile(x, quantiles)
+    if not bounded and not dtype:
+        if is_integer_dtype(bins):
+            bins = bins.astype(np.float64)
+        bins[0] = -np.inf
+        bins[-1] = np.inf
     fac, bins = _bins_to_cuts(x, bins, labels=labels,
                               precision=precision, include_lowest=True,
                               dtype=dtype, duplicates=duplicates)

diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py
@@ -197,3 +197,30 @@ def test_date_like_qcut_bins(arg, expected_bins):
     ser = Series(arg)
     result, result_bins = qcut(ser, 2, retbins=True)
     tm.assert_index_equal(result_bins, expected_bins)
+
+
+def test_qcut_unbounded():
+    # GH 17282
+    labels = qcut(range(5), 4, bounded=False)
+    left = labels.categories.left.values
+    right = labels.categories.right.values
+    expected = np.array([-np.inf, 1.0, 2.0, 3.0, np.inf])
+    tm.assert_numpy_array_equal(left, expected[:-1])
+    tm.assert_numpy_array_equal(right, expected[1:])
+
+
+@pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)])
+def test_datetimetz_qcut_unbounded(bins):
+    # GH 19872
+    tz = 'US/Eastern'
+    s = Series(date_range('20130101', periods=3, tz=tz))
+    result = qcut(s, bins, bounded=False)
+    expected = Series(IntervalIndex([
+        Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
+                 Timestamp("2013-01-01 16:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
+                 Timestamp("2013-01-02 08:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
+                 Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
+        CDT(ordered=True))
+    tm.assert_series_equal(result, expected)