In [1]:
import pandas as pd
import numpy as np

In [2]:
def merge0(series, start_time, end_time):
    summary_series = series.groupby(level=0).sum()
    merged = pd.Series([], index=[])
    indices = summary_series.index
    for d in pd.date_range(start_time, end_time):
        if d in indices:
            merged = merged.append(pd.Series([summary_series[d]], index=[d]))
        else:
            merged = merged.append(pd.Series([0], index=[d]))
    return merged.dropna().sort_index(ascending=True)


def merge1(series, start_time, end_time):
    summary_series = series.groupby(level=0).sum().sort_index(ascending=True)
    indices = pd.date_range(start_time, end_time)
    base_series = pd.Series(np.zeros(len(indices)), index=indices)
    merged = base_series + (summary_series[indices[0]:indices[-1]])
    return merged.fillna(0).astype("int64")


In [3]:
pd.date_range("2017-11-20", "2018-01-10")

DatetimeIndex(['2017-11-20', '2017-11-21', '2017-11-22', '2017-11-23',
               '2017-11-24', '2017-11-25', '2017-11-26', '2017-11-27',
               '2017-11-28', '2017-11-29', '2017-11-30', '2017-12-01',
               '2017-12-02', '2017-12-03', '2017-12-04', '2017-12-05',
               '2017-12-06', '2017-12-07', '2017-12-08', '2017-12-09',
               '2017-12-10', '2017-12-11', '2017-12-12', '2017-12-13',
               '2017-12-14', '2017-12-15', '2017-12-16', '2017-12-17',
               '2017-12-18', '2017-12-19', '2017-12-20', '2017-12-21',
               '2017-12-22', '2017-12-23', '2017-12-24', '2017-12-25',
               '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29',
               '2017-12-30', '2017-12-31', '2018-01-01', '2018-01-02',
               '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06',
               '2018-01-07', '2018-01-08', '2018-01-09', '2018-01-10'],
              dtype='datetime64[ns]', freq='D')

In [4]:
ranges = pd.date_range("2017-11-20", "2018-01-10")
series = pd.Series(np.arange(len(ranges)), index=ranges)
series = series.drop(ranges[::3])
series = series.append(series)

In [5]:
from pandas.util.testing import assert_series_equal
assert_series_equal(
    merge0(series, "2017-12-01", "2017-12-31"), merge1(series, "2017-12-01", "2017-12-31")
)

In [6]:
merge0(series, "2017-12-01", "2017-12-31")

2017-12-01    22
2017-12-02     0
2017-12-03    26
2017-12-04    28
2017-12-05     0
2017-12-06    32
2017-12-07    34
2017-12-08     0
2017-12-09    38
2017-12-10    40
2017-12-11     0
2017-12-12    44
2017-12-13    46
2017-12-14     0
2017-12-15    50
2017-12-16    52
2017-12-17     0
2017-12-18    56
2017-12-19    58
2017-12-20     0
2017-12-21    62
2017-12-22    64
2017-12-23     0
2017-12-24    68
2017-12-25    70
2017-12-26     0
2017-12-27    74
2017-12-28    76
2017-12-29     0
2017-12-30    80
2017-12-31    82
dtype: int64

In [7]:
%timeit -n10 merge0(series, "2017-12-01", "2017-12-31")

11.2 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%timeit -n10 merge1(series, "2017-12-01", "2017-12-31")

1.64 ms ± 402 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [26]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [29]:
%lprun -f merge0 merge0(series, "2017-12-01", "2017-12-31")

In [16]:
%timeit -n100 series.groupby(series.index).sum()

605 µs ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
%timeit -n100 series.groupby(level=0).sum()

627 µs ± 150 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
df =  series.groupby(level=0).sum()
df.index.is_unique

True

In [22]:
df.index.is_monotonic_increasing

True