Skip to content

Commit

Permalink
CLN: make .quantile more datetimelike friendly
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 3, 2016
1 parent a32674b commit b7ec843
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 37 deletions.
8 changes: 2 additions & 6 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ Bug Fixes

- Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`)
- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum ``timedeltas`` as recognized by Pandas. See :ref:`documentation <timedeltas.limitations>`. (:issue:`12727`)
- Bug in ``.quantile()`` with interpolation may coerce to ``float`` unexpectedly (:issue:`12772`)
- Bug in ``.quantile()`` with empty Series may return scalar rather than empty Series (:issue:`12772`)



Expand Down Expand Up @@ -228,9 +230,3 @@ Bug Fixes
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
- Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)




- Bug in ``.quantile`` with interpolation may coerce to ``float`` unexpectedly (:issue:`12772`)
- Bug in ``.quantile`` with empty Series may return scalar rather than empty Series (:issue:`12772`)
66 changes: 37 additions & 29 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,18 +1255,32 @@ def equals(self, other):
return False
return array_equivalent(self.values, other.values)

def quantile(self, values, qs, **kwargs):
def quantile(self, qs, mgr=None, **kwargs):
"""
compute the quantiles of the
Parameters
----------
qs : a scalar or list of the quantiles to be computed
"""

values = self.get_values()
values, mask, _, _ = self._try_coerce_args(values, values)
if not lib.isscalar(mask) and mask.any():
values = values[~mask]

if len(values) == 0:
if com.is_list_like(qs):
return np.array([self.fill_value])
result = np.array([self.fill_value])
else:
return self._na_value

if com.is_list_like(qs):
result = self._na_value
elif com.is_list_like(qs):
values = [_quantile(values, x * 100, **kwargs) for x in qs]
return np.array(values)
result = np.array(values)
else:
return _quantile(values, qs * 100, **kwargs)
result = _quantile(values, qs * 100, **kwargs)

return self._try_coerce_result(result)


class NonConsolidatableMixIn(object):
Expand Down Expand Up @@ -1499,22 +1513,6 @@ def get_values(self, dtype=None):
self._box_func).reshape(self.values.shape)
return self.values

def quantile(self, values, qs, **kwargs):
values = values.view('i8')
mask = values == self.fill_value
if mask.any():
values = values[~mask]
result = Block.quantile(self, values, qs, **kwargs)

if com.is_datetime64tz_dtype(self):
# ToDo: Temp logic to avoid GH 12619 and GH 12772
# which affects to DatetimeBlockTZ_try_coerce_result for np.ndarray
if isinstance(result, np.ndarray) and values.ndim > 0:
result = self._holder(result, tz='UTC')
result = result.tz_convert(self.values.tz)
return result
return self._try_coerce_result(result)


class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
__slots__ = ()
Expand Down Expand Up @@ -2274,12 +2272,14 @@ def _try_coerce_args(self, values, other):

def _try_coerce_result(self, result):
""" reverse of try_coerce_args """
result = super(DatetimeTZBlock, self)._try_coerce_result(result)

if isinstance(result, np.ndarray):
result = self._holder(result, tz=self.values.tz)
if result.dtype.kind in ['i', 'f', 'O']:
result = result.astype('M8[ns]')
elif isinstance(result, (np.integer, np.float, np.datetime64)):
result = lib.Timestamp(result, tz=self.values.tz)
result = lib.Timestamp(result).tz_localize(self.values.tz)
if isinstance(result, np.ndarray):
result = self._holder(result).tz_localize(self.values.tz)

return result

@property
Expand Down Expand Up @@ -2806,7 +2806,7 @@ def _verify_integrity(self):
len(self.items), tot_items))

def apply(self, f, axes=None, filter=None, do_integrity_check=False,
consolidate=True, **kwargs):
consolidate=True, raw=False, **kwargs):
"""
iterate over the blocks, collect and create a new block manager
Expand All @@ -2820,6 +2820,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
integrity check
consolidate: boolean, default True. Join together blocks having same
dtype
raw: boolean, default False. Return the raw returned results
Returns
-------
Expand Down Expand Up @@ -2886,7 +2887,11 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
applied = getattr(b, f)(**kwargs)
result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
if raw:
if self._is_single_block:
return result_blocks[0]
return result_blocks
elif len(result_blocks) == 0:
return self.make_empty(axes or self.axes)
bm = self.__class__(result_blocks, axes or self.axes,
do_integrity_check=do_integrity_check)
Expand All @@ -2902,6 +2907,9 @@ def where(self, **kwargs):
def eval(self, **kwargs):
return self.apply('eval', **kwargs)

def quantile(self, **kwargs):
return self.apply('quantile', raw=True, **kwargs)

def setitem(self, **kwargs):
return self.apply('setitem', **kwargs)

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1354,8 +1354,7 @@ def quantile(self, q=0.5, interpolation='linear'):
if not _np_version_under1p9:
kwargs.update({'interpolation': interpolation})

result = self._data._block.quantile(self.dropna()._values,
q, **kwargs)
result = self._data.quantile(qs=q, **kwargs)

if com.is_list_like(result):
# explicitly use Float64Index to coerce empty result to float dtype
Expand Down

0 comments on commit b7ec843

Please sign in to comment.