In [1]:
import pandas as pd
import datetime as dt
import holidays

#### Create dataframe

In [2]:
def create_date_dataframe(start='2019-01-01', end='2024-12-31'):   
    df = pd.DataFrame({"Date": pd.date_range(start, end)})
    df["Day"] = df.Date.dt.day_name()
    df["Weekday"] = df.Date.dt.weekday
    df["Week"] = df.Date.dt.isocalendar().week
    return df

In [4]:
df_dates = create_date_dataframe()
df_dates.tail(5)

Unnamed: 0,Date,Day,Weekday,Week
2187,2024-12-27,Friday,4,52
2188,2024-12-28,Saturday,5,52
2189,2024-12-29,Sunday,6,52
2190,2024-12-30,Monday,0,1
2191,2024-12-31,Tuesday,1,1


#### Transform data

In [6]:
df_dates['ID'] = df_dates.Date.astype(str)
df_dates['ID'] = df_dates.ID.str.replace('-','')

In [7]:
df_dates.head(7)

Unnamed: 0,Date,Day,Weekday,Week,ID
0,2019-01-01,Tuesday,1,1,20190101
1,2019-01-02,Wednesday,2,1,20190102
2,2019-01-03,Thursday,3,1,20190103
3,2019-01-04,Friday,4,1,20190104
4,2019-01-05,Saturday,5,1,20190105
5,2019-01-06,Sunday,6,1,20190106
6,2019-01-07,Monday,0,2,20190107


In [10]:
df_dates['Is_Weekend'] = [0 if x < 5 else 1 for x in df_dates['Weekday']]

In [11]:
df_dates.head(7)

Unnamed: 0,Date,Day,Weekday,Week,ID,Is_Weekend
0,2019-01-01,Tuesday,1,1,20190101,0
1,2019-01-02,Wednesday,2,1,20190102,0
2,2019-01-03,Thursday,3,1,20190103,0
3,2019-01-04,Friday,4,1,20190104,0
4,2019-01-05,Saturday,5,1,20190105,1
5,2019-01-06,Sunday,6,1,20190106,1
6,2019-01-07,Monday,0,2,20190107,0


In [12]:
# Select country
us_holidays = holidays.US()

In [13]:
print(df_dates.Date[0] in us_holidays)

True


In [14]:
df_dates['Is_Holiday'] = [1 if x in us_holidays else 0 for x in df_dates.Date]

In [15]:
df_dates.head()

Unnamed: 0,Date,Day,Weekday,Week,ID,Is_Weekend,Is_Holiday
0,2019-01-01,Tuesday,1,1,20190101,0,1
1,2019-01-02,Wednesday,2,1,20190102,0,0
2,2019-01-03,Thursday,3,1,20190103,0,0
3,2019-01-04,Friday,4,1,20190104,0,0
4,2019-01-05,Saturday,5,1,20190105,1,0


In [16]:
df_dates.drop(columns='Weekday', inplace=True)

#### Reorder dataframe

In [17]:
df_dates.columns

Index(['Date', 'Day', 'Week', 'ID', 'Is_Weekend', 'Is_Holiday'], dtype='object')

In [18]:
df_dates = df_dates[['ID', 'Date', 'Day', 'Week','Is_Weekend','Is_Holiday']]

#### Export to CSV

In [19]:
df_dates.to_csv("/Users/mattrende/Documents/Coding/hockey-reference/YouTube Project/csv/out/dim_dates.csv",index=False)