### Analyze data with Holidays

Use an instance of CustomBusinessDays

In [26]:
import pandas as pd

df = pd.read_csv("aapl_holiday.csv")
df.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,3-Jul-17,144.88,145.3,143.1,143.5,14277848
1,5-Jul-17,143.69,144.79,142.72,144.09,21569557
2,6-Jul-17,143.02,143.5,142.41,142.73,24128782
3,7-Jul-17,142.9,144.75,142.9,144.18,19201712
4,10-Jul-17,144.11,145.95,143.37,145.06,21090636


In [29]:
# the problem is July 4, 2017 is a Holiday in US

rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq='B')
rng

DatetimeIndex(['2017-07-03', '2017-07-04', '2017-07-05', '2017-07-06',
               '2017-07-07', '2017-07-10', '2017-07-11', '2017-07-12',
               '2017-07-13', '2017-07-14', '2017-07-17', '2017-07-18',
               '2017-07-19', '2017-07-20', '2017-07-21'],
              dtype='datetime64[ns]', freq='B')

In [30]:
# for solving the problem of US Holiday

from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

us_calendar = CustomBusinessDay(calendar = USFederalHolidayCalendar())

# replace the freq with the custom calendar (us_calendar)

rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq=us_calendar)
rng

DatetimeIndex(['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07',
               '2017-07-10', '2017-07-11', '2017-07-12', '2017-07-13',
               '2017-07-14', '2017-07-17', '2017-07-18', '2017-07-19',
               '2017-07-20', '2017-07-21'],
              dtype='datetime64[ns]', freq='C')

In [31]:
df.set_index(rng,inplace=True)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
2017-07-03,3-Jul-17,144.88,145.3,143.1,143.5,14277848
2017-07-05,5-Jul-17,143.69,144.79,142.72,144.09,21569557
2017-07-06,6-Jul-17,143.02,143.5,142.41,142.73,24128782
2017-07-07,7-Jul-17,142.9,144.75,142.9,144.18,19201712
2017-07-10,10-Jul-17,144.11,145.95,143.37,145.06,21090636


In [32]:
# for custom calendar; weekends are not included

from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday

class myCustomCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday("Sir Mike's Birthday", month=10, day=4)
    ]
sir_mike_bday = CustomBusinessDay(calendar=myCustomCalendar())

pd.date_range('10/1/2017', '10/31/2017', freq=sir_mike_bday)

DatetimeIndex(['2017-10-02', '2017-10-03', '2017-10-05', '2017-10-06',
               '2017-10-09', '2017-10-10', '2017-10-11', '2017-10-12',
               '2017-10-13', '2017-10-16', '2017-10-17', '2017-10-18',
               '2017-10-19', '2017-10-20', '2017-10-23', '2017-10-24',
               '2017-10-25', '2017-10-26', '2017-10-27', '2017-10-30',
               '2017-10-31'],
              dtype='datetime64[ns]', freq='C')

In [39]:
# custom weekdays/weekends/business day
# use weekmask to create own weekdays

#new weekdays are from Sun to Thu
tanauan_weekdays = "Sun Mon Tue Wed Thu"

# accepts weekmask = saudi_weekdays
s = CustomBusinessDay(weekmask=saudi_weekdays)

# new date range; freq is the custom business day
pd.date_range(start="7/1/2017", periods=20, freq=s)

DatetimeIndex(['2017-07-02', '2017-07-03', '2017-07-04', '2017-07-05',
               '2017-07-06', '2017-07-09', '2017-07-10', '2017-07-11',
               '2017-07-12', '2017-07-13', '2017-07-16', '2017-07-17',
               '2017-07-18', '2017-07-19', '2017-07-20', '2017-07-23',
               '2017-07-24', '2017-07-25', '2017-07-26', '2017-07-27'],
              dtype='datetime64[ns]', freq='C')

In [40]:
# holidays must be a list; must be in a square bracket

t = CustomBusinessDay(holidays=['2017-07-24', '2017-07-25'], weekmask=tanauan_weekdays)

pd.date_range(start="7/1/2017", periods=25, freq=t)

DatetimeIndex(['2017-07-02', '2017-07-03', '2017-07-04', '2017-07-05',
               '2017-07-06', '2017-07-09', '2017-07-10', '2017-07-11',
               '2017-07-12', '2017-07-13', '2017-07-16', '2017-07-17',
               '2017-07-18', '2017-07-19', '2017-07-20', '2017-07-23',
               '2017-07-26', '2017-07-27', '2017-07-30', '2017-07-31',
               '2017-08-01', '2017-08-02', '2017-08-03', '2017-08-06',
               '2017-08-07'],
              dtype='datetime64[ns]', freq='C')