In [265]:
import numpy as np
import datetime


def encode_date(date: datetime.date) -> int:
    return date.year * 10000 + date.month * 100 + date.day


def generate_day_ints(dates: np.ndarray, day: np.ndarray) -> np.ndarray:
    """Generate the day ints from the dates."""
    years = np.unique([date.year for date in dates])
    months = np.unique([date.month for date in dates])

    min_date = np.min(dates)
    max_date = np.max(dates)


    for year in years:
        for month in months:
            try:
                date = encode_date(datetime.date(year, month, day))
            except ValueError:
                date = int(year * 10000 + month * 100 + day)
            # if date > encode_date(max_date):
            #     break
            # elif date < encode_date(min_date):
            #     continue
            # else:
            #     yield date
            yield date

In [266]:
import pandas as pd

# generate date range with only working days
date_rng = pd.date_range(start='1/15/2018', end='12/17/2018', freq='B')
decoded_date_rng = np.vectorize(encode_date)(date_rng.to_pydatetime())

In [267]:
# split date range into month, each array is a month
date_rng_split = np.split(decoded_date_rng, np.where(np.diff(date_rng.to_numpy().astype('datetime64[M]')))[0]+1)

In [276]:
target_dates = np.array(list(generate_day_ints(date_rng, 31)))

In [289]:
target_dates

array([20180131, 20180231, 20180331, 20180431, 20180531, 20180631,
       20180731, 20180831, 20180931, 20181031, 20181131, 20181231])

In [305]:
len(target_dates) == len(date_rng_split) # True

True

In [291]:
date_rng_split[0]

array([20180115, 20180116, 20180117, 20180118, 20180119, 20180122,
       20180123, 20180124, 20180125, 20180126, 20180129, 20180130,
       20180131])

In [315]:
day_to_invest = []

glob_min = np.min(decoded_date_rng)
glob_max = np.max(decoded_date_rng)

for dates_of_month, target_day in zip(date_rng_split, target_dates):

    if target_day < glob_min or target_day > glob_max:
        pass
    else:
        idx = np.searchsorted(dates_of_month, target_day, side='left')
        if np.max(idx) < len(dates_of_month):
            day_to_invest.append(dates_of_month[idx])
        else:
            day_to_invest.append(dates_of_month[-1])

In [316]:
day_to_invest

[np.int64(20180131),
 np.int64(20180228),
 np.int64(20180330),
 np.int64(20180430),
 np.int64(20180531),
 np.int64(20180629),
 np.int64(20180731),
 np.int64(20180831),
 np.int64(20180928),
 np.int64(20181031),
 np.int64(20181130)]

In [310]:
# find the indices where day_to_invest matches decoded_date_rng
idx = np.searchsorted(decoded_date_rng, day_to_invest, side='left')

In [314]:
decoded_date_rng[idx]

array([20180131, 20180228, 20180330, 20180430, 20180531, 20180629,
       20180731, 20180831, 20180928, 20181031, 20181130])