From 20511dd76266dd70b7a073da1091a4f52f37a3a4 Mon Sep 17 00:00:00 2001 From: lexual Date: Sat, 14 Nov 2015 09:26:16 +1100 Subject: [PATCH] PERF: Faster Series construction with no data and DatetimeIndex. ref pydata/pandas#11433 Code. taken from @jreback comment on pydata/pandas#11433 --- asv_bench/benchmarks/series_methods.py | 14 ++++++++++++++ doc/source/whatsnew/v0.17.1.txt | 1 + pandas/core/series.py | 17 +++++++++++------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index a40ed3f1d6482..4e368c6d7cde2 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -1,6 +1,20 @@ from .pandas_vb_common import * +class series_constructor_no_data_datetime_index(object): + goal_time = 0.2 + + def setup(self): + self.dr = pd.date_range( + start=datetime(2015,10,26), + end=datetime(2016,1,1), + freq='10s' + ) # ~500k long + + def time_series_constructor_no_data_datetime_index(self): + Series(data=None, index=self.dr) + + class series_isin_int64(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 3d10566e47075..1b8cd270b3e8c 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -86,6 +86,7 @@ Performance Improvements - Improved performance to ``to_excel`` (:issue:`11352`) - Performance bug in repr of ``Categorical`` categories, which was rendering the strings before chopping them for display (:issue:`11305`) +- Improved performance of ``Series`` constructor with no data and ``DatetimeIndex`` (:issue:`11433`) .. _whatsnew_0171.bug_fixes: diff --git a/pandas/core/series.py b/pandas/core/series.py index 5106225cdd3c9..bc9a2f466530e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -171,17 +171,22 @@ def __init__(self, data=None, index=None, dtype=None, name=None, index = Index(_try_sort(data)) try: if isinstance(index, DatetimeIndex): - # coerce back to datetime objects for lookup - data = _dict_compat(data) - data = lib.fast_multiget(data, index.astype('O'), - default=np.nan) + if len(data): + # coerce back to datetime objects for lookup + data = _dict_compat(data) + data = lib.fast_multiget(data, index.astype('O'), + default=np.nan) + else: + data = np.nan elif isinstance(index, PeriodIndex): - data = [data.get(i, nan) for i in index] + data = [data.get(i, nan) + for i in index] if data else np.nan else: data = lib.fast_multiget(data, index.values, default=np.nan) except TypeError: - data = [data.get(i, nan) for i in index] + data = [data.get(i, nan) + for i in index] if data else np.nan elif isinstance(data, SingleBlockManager): if index is None: