Merge pull request #8476 from jreback/td_std

BUG: allow std to work with timedeltas (GH8471)
pandas-dev · Oct 5, 2014 · 72a051c · 72a051c
2 parents d22b382 + d27e37a
commit 72a051c
Show file tree

Hide file tree

Showing 5 changed files with 79 additions and 70 deletions.
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -638,7 +638,7 @@ TimedeltaIndex/Scalar
 We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
 but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
 This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs <timedeltas.timedeltas>`.
-(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`)
+(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`)
 
 .. warning::
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3950,60 +3950,42 @@ def mad(self,  axis=None, skipna=None, level=None, **kwargs):
             return np.abs(demeaned).mean(axis=axis, skipna=skipna)
         cls.mad = mad
 
-        @Substitution(outname='variance',
-                      desc="Return unbiased variance over requested "
-                           "axis.\n\nNormalized by N-1 by default. "
-                           "This can be changed using the ddof argument")
-        @Appender(_num_doc)
-        def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
-            if skipna is None:
-                skipna = True
-            if axis is None:
-                axis = self._stat_axis_number
-            if level is not None:
-                return self._agg_by_level('var', axis=axis, level=level,
-                                          skipna=skipna, ddof=ddof)
+        def _make_stat_function_ddof(name, desc, f):
 
-            return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
-                                ddof=ddof)
-        cls.var = var
-
-        @Substitution(outname='stdev',
-                      desc="Return unbiased standard deviation over requested "
-                           "axis.\n\nNormalized by N-1 by default. "
-                           "This can be changed using the ddof argument")
-        @Appender(_num_doc)
-        def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
-            if skipna is None:
-                skipna = True
-            if axis is None:
-                axis = self._stat_axis_number
-            if level is not None:
-                return self._agg_by_level('std', axis=axis, level=level,
-                                          skipna=skipna, ddof=ddof)
-            result = self.var(axis=axis, skipna=skipna, ddof=ddof)
-            if getattr(result, 'ndim', 0) > 0:
-                return result.apply(np.sqrt)
-            return np.sqrt(result)
-        cls.std = std
-
-        @Substitution(outname='standarderror',
-                      desc="Return unbiased standard error of the mean over "
-                           "requested axis.\n\nNormalized by N-1 by default. "
-                           "This can be changed using the ddof argument")
-        @Appender(_num_doc)
-        def sem(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
-            if skipna is None:
-                skipna = True
-            if axis is None:
-                axis = self._stat_axis_number
-            if level is not None:
-                return self._agg_by_level('sem', axis=axis, level=level,
-                                          skipna=skipna, ddof=ddof)
+            @Substitution(outname=name, desc=desc)
+            @Appender(_num_doc)
+            def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
+                          **kwargs):
+                if skipna is None:
+                    skipna = True
+                if axis is None:
+                    axis = self._stat_axis_number
+                if level is not None:
+                    return self._agg_by_level(name, axis=axis, level=level,
+                                              skipna=skipna, ddof=ddof)
+                return self._reduce(f, axis=axis,
+                                    skipna=skipna, ddof=ddof)
+            stat_func.__name__ = name
+            return stat_func
 
-            return self._reduce(nanops.nansem, axis=axis, skipna=skipna,
-                                ddof=ddof)
-        cls.sem = sem
+        cls.sem = _make_stat_function_ddof(
+            'sem',
+            "Return unbiased standard error of the mean over "
+            "requested axis.\n\nNormalized by N-1 by default. "
+            "This can be changed using the ddof argument",
+            nanops.nansem)
+        cls.var = _make_stat_function_ddof(
+            'var',
+            "Return unbiased variance over requested "
+            "axis.\n\nNormalized by N-1 by default. "
+            "This can be changed using the ddof argument",
+            nanops.nanvar)
+        cls.std = _make_stat_function_ddof(
+            'std',
+            "Return unbiased standard deviation over requested "
+            "axis.\n\nNormalized by N-1 by default. "
+            "This can be changed using the ddof argument",
+            nanops.nanstd)
 
         @Substitution(outname='compounded',
                       desc="Return the compound percentage of the values for "

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -228,7 +228,7 @@ def _wrap_results(result, dtype):
         if not isinstance(result, np.ndarray):
             result = lib.Timedelta(result)
         else:
-            result = result.view(dtype)
+            result = result.astype('i8').view(dtype)
 
     return result
 
@@ -295,7 +295,7 @@ def get_median(x):
     if values.ndim > 1:
         # there's a non-empty array to apply over otherwise numpy raises
         if notempty:
-            return np.apply_along_axis(get_median, axis, values)
+            return _wrap_results(np.apply_along_axis(get_median, axis, values), dtype)
 
         # must return the correct shape, but median is not defined for the
         # empty set so return nans of shape "everything but the passed axis"
@@ -305,7 +305,7 @@ def get_median(x):
         dims = np.arange(values.ndim)
         ret = np.empty(shp[dims != axis])
         ret.fill(np.nan)
-        return ret
+        return _wrap_results(ret, dtype)
 
     # otherwise return a scalar value
     return _wrap_results(get_median(values) if notempty else np.nan, dtype)
@@ -329,15 +329,8 @@ def _get_counts_nanvar(mask, axis, ddof):
     return count, d
 
 
-@disallow('M8','m8')
-@bottleneck_switch(ddof=1)
-def nanvar(values, axis=None, skipna=True, ddof=1):
-
-    # we are going to allow timedelta64[ns] here
-    # but NOT going to coerce them to the Timedelta type
-    # as this could cause overflow
-    # so var cannot be computed (but std can!)
-
+def _nanvar(values, axis=None, skipna=True, ddof=1):
+    # private nanvar calculator
     mask = isnull(values)
     if not _is_floating_dtype(values):
         values = values.astype('f8')
@@ -352,6 +345,23 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
     XX = _ensure_numeric((values ** 2).sum(axis))
     return np.fabs((XX - X ** 2 / count) / d)
 
+@disallow('M8')
+@bottleneck_switch(ddof=1)
+def nanstd(values, axis=None, skipna=True, ddof=1):
+
+    result = np.sqrt(_nanvar(values, axis=axis, skipna=skipna, ddof=ddof))
+    return _wrap_results(result, values.dtype)
+
+@disallow('M8','m8')
+@bottleneck_switch(ddof=1)
+def nanvar(values, axis=None, skipna=True, ddof=1):
+
+    # we are going to allow timedelta64[ns] here
+    # but NOT going to coerce them to the Timedelta type
+    # as this could cause overflow
+    # so var cannot be computed (but std can!)
+    return _nanvar(values, axis=axis, skipna=skipna, ddof=ddof)
+
 @disallow('M8','m8')
 def nansem(values, axis=None, skipna=True, ddof=1):
     var = nanvar(values, axis, skipna, ddof=ddof)
@@ -517,7 +527,7 @@ def nankurt(values, axis=None, skipna=True):
         return result
 
 
-@disallow('M8')
+@disallow('M8','m8')
 def nanprod(values, axis=None, skipna=True):
     mask = isnull(values)
     if skipna and not _is_any_int_dtype(values):

diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
@@ -332,6 +332,10 @@ def test_nanvar(self):
         self.check_funs_ddof(nanops.nanvar, np.var,
                              allow_complex=False, allow_date=False, allow_tdelta=False)
 
+    def test_nanstd(self):
+        self.check_funs_ddof(nanops.nanstd, np.std,
+                             allow_complex=False, allow_date=False, allow_tdelta=True)
+
     def test_nansem(self):
         tm.skip_if_no_package('scipy.stats')
         self.check_funs_ddof(nanops.nansem, np.var,

diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
@@ -479,6 +479,9 @@ def test_timedelta_ops(self):
         expected = to_timedelta(timedelta(seconds=9))
         self.assertEqual(result, expected)
 
+        result = td.to_frame().mean()
+        self.assertEqual(result[0], expected)
+
         result = td.quantile(.1)
         expected = Timedelta(np.timedelta64(2600,'ms'))
         self.assertEqual(result, expected)
@@ -487,18 +490,28 @@ def test_timedelta_ops(self):
         expected = to_timedelta('00:00:08')
         self.assertEqual(result, expected)
 
+        result = td.to_frame().median()
+        self.assertEqual(result[0], expected)
+
         # GH 6462
         # consistency in returned values for sum
         result = td.sum()
         expected = to_timedelta('00:01:21')
         self.assertEqual(result, expected)
 
-        # you can technically do a std, but var overflows
-        # so this is tricky
-        self.assertRaises(TypeError, lambda : td.std())
+        result = td.to_frame().sum()
+        self.assertEqual(result[0], expected)
+
+        # std
+        result = td.std()
+        expected = to_timedelta(Series(td.dropna().values).std())
+        self.assertEqual(result, expected)
+
+        result = td.to_frame().std()
+        self.assertEqual(result[0], expected)
 
         # invalid ops
-        for op in ['skew','kurt','sem','var']:
+        for op in ['skew','kurt','sem','var','prod']:
             self.assertRaises(TypeError, lambda : getattr(td,op)())
 
     def test_timedelta_ops_scalar(self):