# Data Creation: Day Types

This notebook builds a function that adds the type of day for each calendar day.

Including type of day as a short term predictor in energy load forecasts has been shown as a useful predictor [here](https://www.mdpi.com/1996-1073/12/1/164/pdf) and [here](https://www.mdpi.com/1996-1073/11/5/1120/pdf). The types of days (exogenous varaible) that are generated by this function are:

- named day of the week
- weekend or weekday
- holiday or special event


In [9]:
#import relevant libraries
from datetime import date
import pandas as pd
import holidays

In [5]:
#create a datetime range
dates = pd.date_range(start='1/1/2018', end='31/12/2018')
dates

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2018-12-22', '2018-12-23', '2018-12-24', '2018-12-25',
               '2018-12-26', '2018-12-27', '2018-12-28', '2018-12-29',
               '2018-12-30', '2018-12-31'],
              dtype='datetime64[ns]', length=365, freq='D')

In [8]:
#produces categorical numerical values for the day of the week
#monday = 0
#sunday = 6
dates.weekday

Int64Index([0, 1, 2, 3, 4, 5, 6, 0, 1, 2,
            ...
            5, 6, 0, 1, 2, 3, 4, 5, 6, 0],
           dtype='int64', length=365)

In [6]:
#create an object with all the holidays in spain
spain_holidays = holidays.CountryHoliday('ES')

In [13]:
#investigate some known holiday dates
print(date(2019, 1, 1) in spain_holidays)
print(date(2018, 12, 25) in spain_holidays)
print(date(2018, 12, 5) in spain_holidays)


True
True
False


In [14]:
#can also get the name of the holiday
spain_holidays.get('2018-12-25')

'Navidad'

In [36]:
def get_holidays(start='1/1/2018', stop='31/12/2018', country='ES'):
    """
    Takes in a start and stop date and a country.
    
    Produces a dataframe with a daily date time index and columns:
    day_of_week - numerical day of the week identifier 0 for monday
    holiday_bool - boolean true or false for holiday
    holiday_name - name of the holiday if holiday_bool is true
    
    Returns a dataframe
    """
    
    #generate the range of daily dates
    dates = pd.date_range(start=start, end=stop)
    
    #create the holiday object
    country_holidays = holidays.CountryHoliday(country)
    
    #create a dataframe of weekday categories
    days = pd.DataFrame(list(dates.weekday), index=dates, columns=['weekday_id'])

    #create a list for the holiday bool and name
    holiday_list = []
    
    #loop through the dates
    for date in dates:
        #true if holiday in object, false otherwise
        holiday_bool = date in country_holidays
        holiday_names = country_holidays.get(date)
        
        holiday_list.append([holiday_bool, holiday_names])
        
    #create return dataframe
    holidays_data = pd.DataFrame(holiday_list, index=dates, columns=['holiday_bool', 'holiday_name'])
          
    #join the days and the holidays_data dataframes
    data = pd.concat([days, holidays_data], axis=1)
                 
                    
    return data
    

In [37]:
get_holidays()

Unnamed: 0,weekday_id,holiday_bool,holiday_name
2018-01-01,0,True,Año nuevo
2018-01-02,1,False,
2018-01-03,2,False,
2018-01-04,3,False,
2018-01-05,4,False,
2018-01-06,5,True,Epifanía del Señor
2018-01-07,6,False,
2018-01-08,0,False,
2018-01-09,1,False,
2018-01-10,2,False,
