Merge pull request #11598 from lexual/issue_11433_slow_series_constru…

…ct_no_values PERF: Faster Series construction with no data and DatetimeIndex.
pandas-dev · Nov 15, 2015 · 67b1355 · 67b1355
2 parents 10fe47e + 20511dd
commit 67b1355
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 6 deletions.
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -1,6 +1,20 @@
 from .pandas_vb_common import *
 
 
+class series_constructor_no_data_datetime_index(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.dr = pd.date_range(
+            start=datetime(2015,10,26),
+            end=datetime(2016,1,1),
+            freq='10s'
+        )  # ~500k long
+
+    def time_series_constructor_no_data_datetime_index(self):
+        Series(data=None, index=self.dr)
+
+
 class series_isin_int64(object):
     goal_time = 0.2
 

diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -89,6 +89,7 @@ Performance Improvements
 
 - Improved performance to ``to_excel`` (:issue:`11352`)
 - Performance bug in repr of ``Categorical`` categories, which was rendering the strings before chopping them for display (:issue:`11305`)
+- Improved performance of ``Series`` constructor with no data and ``DatetimeIndex`` (:issue:`11433`)
 
 .. _whatsnew_0171.bug_fixes:
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -172,17 +172,22 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                         index = Index(_try_sort(data))
                 try:
                     if isinstance(index, DatetimeIndex):
-                        # coerce back to datetime objects for lookup
-                        data = _dict_compat(data)
-                        data = lib.fast_multiget(data, index.astype('O'),
-                                                 default=np.nan)
+                        if len(data):
+                            # coerce back to datetime objects for lookup
+                            data = _dict_compat(data)
+                            data = lib.fast_multiget(data, index.astype('O'),
+                                                     default=np.nan)
+                        else:
+                            data = np.nan
                     elif isinstance(index, PeriodIndex):
-                        data = [data.get(i, nan) for i in index]
+                        data = [data.get(i, nan)
+                                for i in index] if data else np.nan
                     else:
                         data = lib.fast_multiget(data, index.values,
                                                  default=np.nan)
                 except TypeError:
-                    data = [data.get(i, nan) for i in index]
+                    data = [data.get(i, nan)
+                            for i in index] if data else np.nan
 
             elif isinstance(data, SingleBlockManager):
                 if index is None: