In [405]:
import numpy as np
import datetime


def encode_date(date: datetime.date) -> int:
    return date.year * 10000 + date.month * 100 + date.day


def generate_day_ints(dates: np.ndarray, day: np.ndarray) -> np.ndarray:
    """Generate the day ints from the dates."""


    date_year = dates.astype("datetime64[Y]").astype(int) + 1970
    date_month = dates.astype("datetime64[M]").astype(int) % 12 + 1

    new_dates = np.unique(np.c_[date_year, date_month], axis=0)
    for year, month in new_dates:
            try:
                date = encode_date(datetime.date(year, month, day))
            except ValueError:
                date = int(year * 10000 + month * 100 + day)
            yield date

In [396]:
import pandas as pd

# generate date range with only working days
date_rng = pd.date_range(start='4/15/2018', end='11/17/2018', freq='B')
decoded_date_rng = np.vectorize(encode_date)(date_rng.to_pydatetime())

In [397]:

date_year = date_rng.to_numpy().astype("datetime64[Y]").astype(int) + 1970
date_month = date_rng.to_numpy().astype("datetime64[M]").astype(int) % 12 + 1

new_dates = np.c_[date_year, date_month]

In [400]:
# remove duplicates from the new_dates
new_dates = np.unique(new_dates, axis=0)



In [402]:
# split date range into month, each array is a month
date_rng_split = np.split(decoded_date_rng, np.where(np.diff(date_rng.to_numpy().astype('datetime64[M]')))[0]+1)

In [406]:
target_dates = np.array(list(generate_day_ints(date_rng.to_numpy(), 31)))

In [407]:
target_dates

array([20180431, 20180531, 20180631, 20180731, 20180831, 20180931,
       20181031, 20181131])

In [326]:
len(target_dates) == len(date_rng_split) # True

True

In [327]:
date_rng_split[0]

array([20180416, 20180417, 20180418, 20180419, 20180420, 20180423,
       20180424, 20180425, 20180426, 20180427, 20180430])

In [315]:
day_to_invest = []

glob_min = np.min(decoded_date_rng)
glob_max = np.max(decoded_date_rng)

for dates_of_month, target_day in zip(date_rng_split, target_dates):

    if target_day < glob_min or target_day > glob_max:
        pass
    else:
        idx = np.searchsorted(dates_of_month, target_day, side='left')
        if np.max(idx) < len(dates_of_month):
            day_to_invest.append(dates_of_month[idx])
        else:
            day_to_invest.append(dates_of_month[-1])

In [316]:
day_to_invest

[np.int64(20180131),
 np.int64(20180228),
 np.int64(20180330),
 np.int64(20180430),
 np.int64(20180531),
 np.int64(20180629),
 np.int64(20180731),
 np.int64(20180831),
 np.int64(20180928),
 np.int64(20181031),
 np.int64(20181130)]

In [310]:
# find the indices where day_to_invest matches decoded_date_rng
idx = np.searchsorted(decoded_date_rng, day_to_invest, side='left')

In [314]:
decoded_date_rng[idx]

array([20180131, 20180228, 20180330, 20180430, 20180531, 20180629,
       20180731, 20180831, 20180928, 20181031, 20181130])