diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index c66654ee1e006..3c0e2869357ae 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -111,6 +111,7 @@ def setup(self): def time_series_dropna_int64(self): self.s.dropna() + class series_dropna_datetime(object): goal_time = 0.2 @@ -120,3 +121,13 @@ def setup(self): def time_series_dropna_datetime(self): self.s.dropna() + + +class series_clip(object): + goal_time = 0.2 + + def setup(self): + self.s = pd.Series(np.random.randn(50)) + + def time_series_dropna_datetime(self): + self.s.clip(0, 1) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 03579dab0d6a3..10a6b4354290d 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -19,7 +19,7 @@ Highlights include: Enhancements ~~~~~~~~~~~~ -- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) +- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) .. _whatsnew_0202.performance: @@ -28,6 +28,7 @@ Performance Improvements - Performance regression fix when indexing with a list-like (:issue:`16285`) - Performance regression fix for small MultiIndexes (:issuse:`16319`) +- Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 777cfcae7a326..3e1c5c3f354fd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -14,6 +14,7 @@ _ensure_int64, needs_i8_conversion, is_scalar, + is_number, is_integer, is_bool, is_bool_dtype, is_numeric_dtype, @@ -4104,6 +4105,22 @@ def isnull(self): def notnull(self): return notnull(self).__finalize__(self) + def _clip_with_scalar(self, lower, upper): + + if ((lower is not None and np.any(isnull(lower))) or + (upper is not None and np.any(isnull(upper)))): + raise ValueError("Cannot use an NA value as a clip threshold") + + result = self.values + mask = isnull(result) + if upper is not None: + result = np.where(result >= upper, upper, result) + if lower is not None: + result = np.where(result <= lower, lower, result) + result[mask] = np.nan + return self._constructor( + result, **self._construct_axes_dict()).__finalize__(self) + def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): """ Trim values at input threshold(s). @@ -4122,12 +4139,13 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): Examples -------- >>> df - 0 1 + 0 1 0 0.335232 -1.256177 1 -1.367855 0.746646 2 0.027753 -1.176076 3 0.230930 -0.679613 4 1.261967 0.570967 + >>> df.clip(-1.0, 0.5) 0 1 0 0.335232 -1.000000 @@ -4135,6 +4153,7 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 2 0.027753 -1.000000 3 0.230930 -0.679613 4 0.500000 0.500000 + >>> t 0 -0.3 1 -0.2 @@ -4142,6 +4161,7 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 3 0.0 4 0.1 dtype: float64 + >>> df.clip(t, t + 1, axis=0) 0 1 0 0.335232 -0.300000 @@ -4160,6 +4180,11 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): if is_scalar(lower) and is_scalar(upper): lower, upper = min(lower, upper), max(lower, upper) + # fast-path for scalars + if ((lower is None or (is_scalar(lower) and is_number(lower))) and + (upper is None or (is_scalar(upper) and is_number(upper)))): + return self._clip_with_scalar(lower, upper) + result = self if lower is not None: result = result.clip_lower(lower, axis) @@ -4189,6 +4214,9 @@ def clip_upper(self, threshold, axis=None): if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") + if is_scalar(threshold) and is_number(threshold): + return self._clip_with_scalar(None, threshold) + subset = self.le(threshold, axis=axis) | isnull(self) return self.where(subset, threshold, axis=axis) @@ -4213,6 +4241,9 @@ def clip_lower(self, threshold, axis=None): if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") + if is_scalar(threshold) and is_number(threshold): + return self._clip_with_scalar(threshold, None) + subset = self.ge(threshold, axis=axis) | isnull(self) return self.where(subset, threshold, axis=axis) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ec6a118ec3639..18c6c9a6dd021 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1011,6 +1011,7 @@ def test_clip_against_series(self): lower = Series([1.0, 2.0, 3.0]) upper = Series([1.5, 2.5, 3.5]) + assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))