In [97]:
import pandas as pd
import numpy as np
from datetime import date, timedelta
from calendar import isleap
import holidays

#### Set Start and End Dates

In [None]:
sdate = date(2019,1,1)   # start date
edate = date(2025,1,1)   # end date

#### Helper class for Holiday Feature

In [88]:
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \
    DateOffset, MO, next_monday, next_monday_or_tuesday, GoodFriday, EasterMonday


class Hoildays_England_and_Wales(AbstractHolidayCalendar):
    rules = [
        Holiday('New Years Day', month=1, day=1, observance=next_monday),
        GoodFriday,
        Holiday('Early May Bank Holiday', month=5, day=1, offset=DateOffset(weekday=MO(1))),
        Holiday('Spring Bank Holiday', month=5, day=31, offset=DateOffset(weekday=MO(-1))),
        Holiday('Summer Bank Holiday', month=8, day=31, offset=DateOffset(weekday=MO(-1))),
        Holiday('Christmas Day', month=12, day=25, observance=next_monday),
        Holiday('Boxing Day', month=12, day=26, observance=next_monday_or_tuesday)
    ]

In [135]:
holidays = Hoildays_England_and_Wales().holidays(
    start=sdate,
    end=edate)
holidays

DatetimeIndex(['2019-01-01', '2019-04-19', '2019-05-06', '2019-05-27',
               '2019-08-26', '2019-12-25', '2019-12-26', '2020-01-01',
               '2020-04-10', '2020-05-04', '2020-05-25', '2020-08-31',
               '2020-12-25', '2020-12-28', '2021-01-01', '2021-04-02',
               '2021-05-03', '2021-05-31', '2021-08-30', '2021-12-27',
               '2021-12-28', '2022-01-03', '2022-04-15', '2022-05-02',
               '2022-05-30', '2022-08-29', '2022-12-26', '2022-12-27',
               '2023-01-02', '2023-04-07', '2023-05-01', '2023-05-29',
               '2023-08-28', '2023-12-25', '2023-12-26', '2024-01-01',
               '2024-03-29', '2024-05-06', '2024-05-27', '2024-08-26',
               '2024-12-25', '2024-12-26', '2025-01-01'],
              dtype='datetime64[ns]', freq=None)

#### Helper Dicts

In [174]:
dict_quarter_name = {1 : 'Quarter1', 2 : 'Quarter2', 3 : 'Quarter3', 4 : 'Quarter4'}
dict_quarter_name_short = {1 : 'Q1', 2 : 'Q2', 3 : 'Q3', 4 : 'Q4'}
dict_halfyear_name = {1 : 'Half1', 2 : 'Half2'}
dict_halfyear_name_short = {1 : 'H1', 2 : 'H2'}

#### Create Features

In [191]:
# create features

tmp = pd.date_range(sdate, edate-timedelta(days=1),freq='d')
df = pd.DataFrame(tmp, columns=['calendar_date'])
df['smart_key'] = df['calendar_date'].dt.strftime('%Y%m%d')

df['day_number'] = tmp.day
df['day_name_short'] = tmp.day_name()
df['day_name_short'] = df['day_name_short'].str[0:3]
df['day_name'] = tmp.day_name()

df['week_number_in_year'] = tmp.weekofyear

df['month_number_in_year'] = tmp.month
df['month_name_short'] = tmp.month_name()
df['month_name_short'] = df['month_name_short'].str[0:3]
df['month_name'] = tmp.month_name()

df['quarter_number_in_year'] = tmp.quarter
df['quarter_name_short'] = df.quarter_number_in_year.map(dict_quarter_name_short)
df['quarter_name'] = df.quarter_number_in_year.map(dict_quarter_name)

df['half_year_number'] = (df.quarter_number_in_year + 1) // 2
df['half_year_name_short'] = df.half_year_number.map(dict_halfyear_name_short)
df['half_year_name'] = df.half_year_number.map(dict_halfyear_name)

df['year_number'] = tmp.year

df['is_weekend'] = ( (tmp.dayofweek) // 5 == 1 ).astype(int)
df['is_holiday_england_wales'] = df['calendar_date'].isin(holidays).astype(int)

# set index
df = df.set_index('smart_key')

# add leading zeros
df['day_number'] = df.day_number.map('{:02}'.format).astype('str')
df['week_number_in_year'] = df.week_number_in_year.map('{:02}'.format).astype('str')
df['month_number_in_year'] = df.month_number_in_year.map('{:02}'.format).astype('str')
df.to_csv('ww_master_calendar_date.csv')
df.head()

Unnamed: 0_level_0,calendar_date,day_number,day_name_short,day_name,week_number_in_year,month_number_in_year,month_name_short,month_name,quarter_number_in_year,quarter_name_short,quarter_name,half_year_number,half_year_name_short,half_year_name,year_number,is_weekend,is_holiday_england_wales
smart_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
20190101,2019-01-01,1,Tue,Tuesday,1,1,Jan,January,1,Q1,Quarter1,1,H1,Half1,2019,0,1
20190102,2019-01-02,2,Wed,Wednesday,1,1,Jan,January,1,Q1,Quarter1,1,H1,Half1,2019,0,0
20190103,2019-01-03,3,Thu,Thursday,1,1,Jan,January,1,Q1,Quarter1,1,H1,Half1,2019,0,0
20190104,2019-01-04,4,Fri,Friday,1,1,Jan,January,1,Q1,Quarter1,1,H1,Half1,2019,0,0
20190105,2019-01-05,5,Sat,Saturday,1,1,Jan,January,1,Q1,Quarter1,1,H1,Half1,2019,1,0


#### Policy Tracker File (not used)

In [178]:
# fp = '20201112 - COVID-19 policy tracker - 02 - Measures to limit spread.csv'
# tmp = pd.read_csv(fp,
#                  dtype=str,
#                  encoding='ISO-8859-1', # to prevent unicode error
#                  keep_default_na=True,
#                  dayfirst=True,
#                  #infer_datetime_format=True,
#                  parse_dates = ['Date'])
# tmp = tmp.replace(r'\s+', np.nan, regex=True).replace('', np.nan) 
# print(tmp.shape)
# tmp.head()
# tmp = tmp.set_index(pd.DatetimeIndex(tmp['Date']))
# print(type(tmp))
# # df.Date = df.Date.dropna()
# events = tmp.index.dropna()

In [140]:
# df['is_measure_to_limit_spread'] = df['date'].isin(events)
# df
# df.to_csv('ww_master_dates.csv')