#### Deliverable 4: Date Dimension Implementation

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import sqlite3 as lite

In [2]:
conn = lite.connect("store1.db")
cur = conn.cursor()

output_file_path = "./output/"

In [3]:
current_date = datetime(2024, 1, 1)
end_date = datetime(2025, 6, 30)

dates = []

while current_date <= end_date:
    dates.append(current_date)
    current_date += timedelta(days=1)

date_df = pd.DataFrame(dates, columns=['Date'])
date_df.head()

Unnamed: 0,Date
0,2024-01-01
1,2024-01-02
2,2024-01-03
3,2024-01-04
4,2024-01-05


In [4]:
date_df.tail()

Unnamed: 0,Date
542,2025-06-26
543,2025-06-27
544,2025-06-28
545,2025-06-29
546,2025-06-30


In [5]:
# adding other columns
date_df['DateKey'] = date_df.index + 1   # 1 to 365
date_df['DayNumberInMonth'] = date_df['Date'].dt.day
date_df['DayNumberInYear'] = date_df['Date'].dt.dayofyear
date_df['WeekNumberInYear'] = date_df['Date'].dt.isocalendar().week
date_df['MonthNum'] = date_df['Date'].dt.month
date_df['MonthTxt'] = date_df['Date'].dt.strftime("%B")
date_df['Quarter'] = date_df['Date'].dt.quarter
date_df['Year'] = date_df['Date'].dt.year


# adding Fiscal year (ends in july)
date_df['Fiscal Year'] = date_df.apply(
    lambda x: x['Year'] if x['MonthNum'] >= 8 else x['Year'] - 1, axis=1)

date_df['isWeekend'] = date_df['Date'].dt.dayofweek >= 5

In [6]:
date_df.tail(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend
542,2025-06-26,543,26,177,26,6,June,2,2025,2024,False
543,2025-06-27,544,27,178,26,6,June,2,2025,2024,False
544,2025-06-28,545,28,179,26,6,June,2,2025,2024,True
545,2025-06-29,546,29,180,26,6,June,2,2025,2024,True
546,2025-06-30,547,30,181,27,6,June,2,2025,2024,False


In [7]:
date_df[(date_df['Quarter'] == 1) & (date_df['Year'] == 2025)].head()

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend
366,2025-01-01,367,1,1,1,1,January,1,2025,2024,False
367,2025-01-02,368,2,2,1,1,January,1,2025,2024,False
368,2025-01-03,369,3,3,1,1,January,1,2025,2024,False
369,2025-01-04,370,4,4,1,1,January,1,2025,2024,True
370,2025-01-05,371,5,5,1,1,January,1,2025,2024,True


In [8]:
def get_season(date):
    year = date.year
    seasons = [
        ('Winter', datetime(year, 12, 21)),
        ('Spring', datetime(year, 3, 20)),
        ('Summer', datetime(year, 6, 21)),
        ('Fall', datetime(year, 9, 22))
    ]

    if date.month == 12 and date.day >= 21:
        return 'Winter'

    # other months, find the most recent season change
    for i in range(len(seasons) - 1, -1, -1):
        if date >= seasons[i][1]:
            return seasons[i][0]

    return 'Winter'


date_df['Season'] = date_df['Date'].apply(get_season)

In [9]:
date_df.sample(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend,Season
276,2024-10-03,277,3,277,40,10,October,4,2024,2024,False,Fall
202,2024-07-21,203,21,203,29,7,July,3,2024,2023,True,Summer
382,2025-01-17,383,17,17,3,1,January,1,2025,2024,False,Winter
138,2024-05-18,139,18,139,20,5,May,2,2024,2023,True,Spring
476,2025-04-21,477,21,111,17,4,April,2,2025,2024,False,Spring


In [10]:
def is_holiday(date):
    year = date.year

    holidays = [
        datetime(year, 1, 1),    # New Year's Day
        datetime(year, 7, 4),    # Independence Day
        datetime(year, 12, 25)   # Christmas Day
    ]

    def get_monday_holiday(month, day):
        d = datetime(year, month, day)
        return d + timedelta(days=(7 - d.weekday()) % 7)

    holidays.extend([
        get_monday_holiday(1, 15),   # MLK Day (3rd Monday)
        get_monday_holiday(2, 15),   # Presidents Day (3rd Monday)
        get_monday_holiday(5, 25),   # Memorial Day (last Monday)
        get_monday_holiday(9, 1),    # Labor Day (1st Monday)
        get_monday_holiday(10, 8),   # Columbus Day (2nd Monday)
        get_monday_holiday(11, 11),  # Veterans Day
        get_monday_holiday(11, 22)   # Thanksgiving (4th Thursday)
    ])

    return date.date() in [h.date() for h in holidays]


date_df['isHoliday'] = date_df['Date'].apply(is_holiday)
date_df.sample(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend,Season,isHoliday
137,2024-05-17,138,17,138,20,5,May,2,2024,2023,False,Spring,False
89,2024-03-30,90,30,90,13,3,March,1,2024,2023,True,Spring,False
276,2024-10-03,277,3,277,40,10,October,4,2024,2024,False,Fall,False
194,2024-07-13,195,13,195,28,7,July,3,2024,2023,True,Summer,False
162,2024-06-11,163,11,163,24,6,June,2,2024,2023,False,Spring,False


In [11]:
# Reordering the columns
column_order = [
    'DateKey', 'Date', 'DayNumberInMonth', 'DayNumberInYear',
    'WeekNumberInYear', 'MonthNum', 'MonthTxt', 'Quarter', 'Year',
    'Fiscal Year', 'isHoliday', 'isWeekend', 'Season'
]

date_df = date_df[column_order]
date_df.sample(5)

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
230,231,2024-08-18,18,231,33,8,August,3,2024,2024,False,True,Summer
345,346,2024-12-11,11,346,50,12,December,4,2024,2024,False,False,Fall
246,247,2024-09-03,3,247,36,9,September,3,2024,2024,False,False,Summer
446,447,2025-03-22,22,81,12,3,March,1,2025,2024,False,True,Spring
30,31,2024-01-31,31,31,5,1,January,1,2024,2023,False,False,Winter


In [12]:
date_df[date_df['Date'] == "2024-01-15"]

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
14,15,2024-01-15,15,15,3,1,January,1,2024,2023,True,False,Winter


In [13]:
date_df.to_csv(f"{output_file_path}date_dimension.csv", index=False)

In [14]:
conn.execute("Drop table if exists DateDimension")

# Create the Date Dimension table
conn.execute("""
    CREATE TABLE DateDimension (
        DateKey INT PRIMARY KEY,
        Date Date,
        DayNumberInMonth INT,
        DayNumberInYear INT,
        WeekNumberInYear INT,
        MonthNum INT,
        MonthTxt VARCHAR(20),
        Quarter INT,
        Year INT,
        FiscalYear INT,
        isHoliday BOOLEAN,
        isWeekend BOOLEAN,
        Season VARCHAR(20)
    );
""")

<sqlite3.Cursor at 0x124ef4d9fc0>

In [15]:
date_df['Date'] = pd.to_datetime(
    date_df['Date']).dt.strftime('%Y-%m-%d')

In [16]:
date_df.to_sql("DateDimension", conn, if_exists='replace', index=False)

547

In [17]:
cur.execute("select * from DateDimension")

res = cur.fetchall()

date_dimension = pd.DataFrame(res, columns=column_order)

date_dimension.tail(5)

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
542,543,2025-06-26,26,177,26,6,June,2,2025,2024,0,0,Summer
543,544,2025-06-27,27,178,26,6,June,2,2025,2024,0,0,Summer
544,545,2025-06-28,28,179,26,6,June,2,2025,2024,0,1,Summer
545,546,2025-06-29,29,180,26,6,June,2,2025,2024,0,1,Summer
546,547,2025-06-30,30,181,27,6,June,2,2025,2024,0,0,Summer


In [18]:
date_dimension[(date_dimension['Quarter'] == 1) & (date_dimension['Year'] == 2025)].head()

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
366,367,2025-01-01,1,1,1,1,January,1,2025,2024,1,0,Winter
367,368,2025-01-02,2,2,1,1,January,1,2025,2024,0,0,Winter
368,369,2025-01-03,3,3,1,1,January,1,2025,2024,0,0,Winter
369,370,2025-01-04,4,4,1,1,January,1,2025,2024,0,1,Winter
370,371,2025-01-05,5,5,1,1,January,1,2025,2024,0,1,Winter


In [19]:
cur.close()
conn.close()