Skip to content

Commit

Permalink
Merge pull request #8476 from jreback/td_std
Browse files Browse the repository at this point in the history
BUG: allow std to work with timedeltas (GH8471)
  • Loading branch information
jreback committed Oct 5, 2014
2 parents d22b382 + d27e37a commit 72a051c
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 70 deletions.
2 changes: 1 addition & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ TimedeltaIndex/Scalar
We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs <timedeltas.timedeltas>`.
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`)
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`)

.. warning::

Expand Down
86 changes: 34 additions & 52 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3950,60 +3950,42 @@ def mad(self, axis=None, skipna=None, level=None, **kwargs):
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
cls.mad = mad

@Substitution(outname='variance',
desc="Return unbiased variance over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('var', axis=axis, level=level,
skipna=skipna, ddof=ddof)
def _make_stat_function_ddof(name, desc, f):

return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
ddof=ddof)
cls.var = var

@Substitution(outname='stdev',
desc="Return unbiased standard deviation over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('std', axis=axis, level=level,
skipna=skipna, ddof=ddof)
result = self.var(axis=axis, skipna=skipna, ddof=ddof)
if getattr(result, 'ndim', 0) > 0:
return result.apply(np.sqrt)
return np.sqrt(result)
cls.std = std

@Substitution(outname='standarderror',
desc="Return unbiased standard error of the mean over "
"requested axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def sem(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('sem', axis=axis, level=level,
skipna=skipna, ddof=ddof)
@Substitution(outname=name, desc=desc)
@Appender(_num_doc)
def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
**kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level(name, axis=axis, level=level,
skipna=skipna, ddof=ddof)
return self._reduce(f, axis=axis,
skipna=skipna, ddof=ddof)
stat_func.__name__ = name
return stat_func

return self._reduce(nanops.nansem, axis=axis, skipna=skipna,
ddof=ddof)
cls.sem = sem
cls.sem = _make_stat_function_ddof(
'sem',
"Return unbiased standard error of the mean over "
"requested axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nansem)
cls.var = _make_stat_function_ddof(
'var',
"Return unbiased variance over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nanvar)
cls.std = _make_stat_function_ddof(
'std',
"Return unbiased standard deviation over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nanstd)

@Substitution(outname='compounded',
desc="Return the compound percentage of the values for "
Expand Down
36 changes: 23 additions & 13 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _wrap_results(result, dtype):
if not isinstance(result, np.ndarray):
result = lib.Timedelta(result)
else:
result = result.view(dtype)
result = result.astype('i8').view(dtype)

return result

Expand Down Expand Up @@ -295,7 +295,7 @@ def get_median(x):
if values.ndim > 1:
# there's a non-empty array to apply over otherwise numpy raises
if notempty:
return np.apply_along_axis(get_median, axis, values)
return _wrap_results(np.apply_along_axis(get_median, axis, values), dtype)

# must return the correct shape, but median is not defined for the
# empty set so return nans of shape "everything but the passed axis"
Expand All @@ -305,7 +305,7 @@ def get_median(x):
dims = np.arange(values.ndim)
ret = np.empty(shp[dims != axis])
ret.fill(np.nan)
return ret
return _wrap_results(ret, dtype)

# otherwise return a scalar value
return _wrap_results(get_median(values) if notempty else np.nan, dtype)
Expand All @@ -329,15 +329,8 @@ def _get_counts_nanvar(mask, axis, ddof):
return count, d


@disallow('M8','m8')
@bottleneck_switch(ddof=1)
def nanvar(values, axis=None, skipna=True, ddof=1):

# we are going to allow timedelta64[ns] here
# but NOT going to coerce them to the Timedelta type
# as this could cause overflow
# so var cannot be computed (but std can!)

def _nanvar(values, axis=None, skipna=True, ddof=1):
# private nanvar calculator
mask = isnull(values)
if not _is_floating_dtype(values):
values = values.astype('f8')
Expand All @@ -352,6 +345,23 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
XX = _ensure_numeric((values ** 2).sum(axis))
return np.fabs((XX - X ** 2 / count) / d)

@disallow('M8')
@bottleneck_switch(ddof=1)
def nanstd(values, axis=None, skipna=True, ddof=1):

result = np.sqrt(_nanvar(values, axis=axis, skipna=skipna, ddof=ddof))
return _wrap_results(result, values.dtype)

@disallow('M8','m8')
@bottleneck_switch(ddof=1)
def nanvar(values, axis=None, skipna=True, ddof=1):

# we are going to allow timedelta64[ns] here
# but NOT going to coerce them to the Timedelta type
# as this could cause overflow
# so var cannot be computed (but std can!)
return _nanvar(values, axis=axis, skipna=skipna, ddof=ddof)

@disallow('M8','m8')
def nansem(values, axis=None, skipna=True, ddof=1):
var = nanvar(values, axis, skipna, ddof=ddof)
Expand Down Expand Up @@ -517,7 +527,7 @@ def nankurt(values, axis=None, skipna=True):
return result


@disallow('M8')
@disallow('M8','m8')
def nanprod(values, axis=None, skipna=True):
mask = isnull(values)
if skipna and not _is_any_int_dtype(values):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ def test_nanvar(self):
self.check_funs_ddof(nanops.nanvar, np.var,
allow_complex=False, allow_date=False, allow_tdelta=False)

def test_nanstd(self):
self.check_funs_ddof(nanops.nanstd, np.std,
allow_complex=False, allow_date=False, allow_tdelta=True)

def test_nansem(self):
tm.skip_if_no_package('scipy.stats')
self.check_funs_ddof(nanops.nansem, np.var,
Expand Down
21 changes: 17 additions & 4 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ def test_timedelta_ops(self):
expected = to_timedelta(timedelta(seconds=9))
self.assertEqual(result, expected)

result = td.to_frame().mean()
self.assertEqual(result[0], expected)

result = td.quantile(.1)
expected = Timedelta(np.timedelta64(2600,'ms'))
self.assertEqual(result, expected)
Expand All @@ -487,18 +490,28 @@ def test_timedelta_ops(self):
expected = to_timedelta('00:00:08')
self.assertEqual(result, expected)

result = td.to_frame().median()
self.assertEqual(result[0], expected)

# GH 6462
# consistency in returned values for sum
result = td.sum()
expected = to_timedelta('00:01:21')
self.assertEqual(result, expected)

# you can technically do a std, but var overflows
# so this is tricky
self.assertRaises(TypeError, lambda : td.std())
result = td.to_frame().sum()
self.assertEqual(result[0], expected)

# std
result = td.std()
expected = to_timedelta(Series(td.dropna().values).std())
self.assertEqual(result, expected)

result = td.to_frame().std()
self.assertEqual(result[0], expected)

# invalid ops
for op in ['skew','kurt','sem','var']:
for op in ['skew','kurt','sem','var','prod']:
self.assertRaises(TypeError, lambda : getattr(td,op)())

def test_timedelta_ops_scalar(self):
Expand Down

0 comments on commit 72a051c

Please sign in to comment.