#### Deliverable 4: Date Dimension Implementation

In [2]:
import pandas as pd
from datetime import datetime, timedelta
import sqlite3 as lite

In [3]:
conn = lite.connect("store1.db")
cur = conn.cursor()

In [4]:
current_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 31)

dates = []

while current_date <= end_date:
    dates.append(current_date)
    current_date += timedelta(days=1)

date_df = pd.DataFrame(dates, columns=['Date'])
date_df.head()

Unnamed: 0,Date
0,2024-01-01
1,2024-01-02
2,2024-01-03
3,2024-01-04
4,2024-01-05


In [5]:
date_df.tail()

Unnamed: 0,Date
361,2024-12-27
362,2024-12-28
363,2024-12-29
364,2024-12-30
365,2024-12-31


In [6]:
# adding other columns
date_df['DateKey'] = date_df.index + 1   # 1 to 365
date_df['DayNumberInMonth'] = date_df['Date'].dt.day
date_df['DayNumberInYear'] = date_df['Date'].dt.dayofyear
date_df['WeekNumberInYear'] = date_df['Date'].dt.isocalendar().week
date_df['MonthNum'] = date_df['Date'].dt.month
date_df['MonthTxt'] = date_df['Date'].dt.strftime("%B")
date_df['Quarter'] = date_df['Date'].dt.quarter
date_df['Year'] = date_df['Date'].dt.year


# adding Fiscal year (ends in july)
date_df['Fiscal Year'] = date_df.apply(
    lambda x: x['Year'] if x['MonthNum'] >= 8 else x['Year'] - 1, axis=1)

date_df['isWeekend'] = date_df['Date'].dt.dayofweek >= 5

In [7]:
date_df.sample(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend
112,2024-04-22,113,22,113,17,4,April,2,2024,2023,False
177,2024-06-26,178,26,178,26,6,June,2,2024,2023,False
233,2024-08-21,234,21,234,34,8,August,3,2024,2024,False
346,2024-12-12,347,12,347,50,12,December,4,2024,2024,False
252,2024-09-09,253,9,253,37,9,September,3,2024,2024,False


In [8]:
def get_season(date):
    year = date.year
    seasons = [
        ('Winter', datetime(year, 12, 21)),
        ('Spring', datetime(year, 3, 20)),
        ('Summer', datetime(year, 6, 21)),
        ('Fall', datetime(year, 9, 22))
    ]

    if date.month == 12 and date.day >= 21:
        return 'Winter'

    # other months, find the most recent season change
    for i in range(len(seasons) - 1, -1, -1):
        if date >= seasons[i][1]:
            return seasons[i][0]

    return 'Winter'


date_df['Season'] = date_df['Date'].apply(get_season)

In [9]:
date_df.sample(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend,Season
67,2024-03-08,68,8,68,10,3,March,1,2024,2023,False,Winter
359,2024-12-25,360,25,360,52,12,December,4,2024,2024,False,Winter
179,2024-06-28,180,28,180,26,6,June,2,2024,2023,False,Summer
94,2024-04-04,95,4,95,14,4,April,2,2024,2023,False,Spring
53,2024-02-23,54,23,54,8,2,February,1,2024,2023,False,Winter


In [10]:
def is_holiday(date):
    year = date.year

    holidays = [
        datetime(year, 1, 1),    # New Year's Day
        datetime(year, 7, 4),    # Independence Day
        datetime(year, 12, 25)   # Christmas Day
    ]

    def get_monday_holiday(month, day):
        d = datetime(year, month, day)
        return d + timedelta(days=(7 - d.weekday()) % 7)

    holidays.extend([
        get_monday_holiday(1, 15),   # MLK Day (3rd Monday)
        get_monday_holiday(2, 15),   # Presidents Day (3rd Monday)
        get_monday_holiday(5, 25),   # Memorial Day (last Monday)
        get_monday_holiday(9, 1),    # Labor Day (1st Monday)
        get_monday_holiday(10, 8),   # Columbus Day (2nd Monday)
        get_monday_holiday(11, 11),  # Veterans Day
        get_monday_holiday(11, 22)   # Thanksgiving (4th Thursday)
    ])

    return date.date() in [h.date() for h in holidays]


date_df['isHoliday'] = date_df['Date'].apply(is_holiday)
date_df.sample(5)

Unnamed: 0,Date,DateKey,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isWeekend,Season,isHoliday
124,2024-05-04,125,4,125,18,5,May,2,2024,2023,True,Spring,False
50,2024-02-20,51,20,51,8,2,February,1,2024,2023,False,Winter,False
320,2024-11-16,321,16,321,46,11,November,4,2024,2024,True,Fall,False
105,2024-04-15,106,15,106,16,4,April,2,2024,2023,False,Spring,False
120,2024-04-30,121,30,121,18,4,April,2,2024,2023,False,Spring,False


In [13]:
# Reordering the columns
column_order = [
    'DateKey', 'Date', 'DayNumberInMonth', 'DayNumberInYear',
    'WeekNumberInYear', 'MonthNum', 'MonthTxt', 'Quarter', 'Year',
    'Fiscal Year', 'isHoliday', 'isWeekend', 'Season'
]

date_df = date_df[column_order]
date_df.sample(5)

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
217,218,2024-08-05,5,218,32,8,August,3,2024,2024,False,False,Summer
313,314,2024-11-09,9,314,45,11,November,4,2024,2024,False,True,Fall
109,110,2024-04-19,19,110,16,4,April,2,2024,2023,False,False,Spring
222,223,2024-08-10,10,223,32,8,August,3,2024,2024,False,True,Summer
339,340,2024-12-05,5,340,49,12,December,4,2024,2024,False,False,Fall


In [32]:
date_df[date_df['Date'] == "2024-01-15"]

Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
14,15,2024-01-15,15,15,3,1,January,1,2024,2023,True,False,Winter


In [14]:
date_df.to_csv("date_dimension.csv", index=False)

In [15]:
# Create the Date Dimension table
conn.execute("""
    CREATE TABLE DateDimension (
        DateKey INT PRIMARY KEY,
        Date DATE,
        DayNumberInMonth INT,
        DayNumberInYear INT,
        WeekNumberInYear INT,
        MonthNum INT,
        MonthTxt VARCHAR(20),
        Quarter INT,
        Year INT,
        FiscalYear INT,
        isHoliday BOOLEAN,
        isWeekend BOOLEAN,
        Season VARCHAR(20)
    );
""")

<sqlite3.Cursor at 0x27a8f7b8a40>

In [16]:
date_df.to_sql("DateDimension", conn, if_exists='replace', index=False)

366

In [25]:
cur.execute("select * from DateDimension limit 10")

res = cur.fetchall()

date_dimension = pd.DataFrame(res, columns=column_order)
date_dimension.sample(5)


Unnamed: 0,DateKey,Date,DayNumberInMonth,DayNumberInYear,WeekNumberInYear,MonthNum,MonthTxt,Quarter,Year,Fiscal Year,isHoliday,isWeekend,Season
9,10,2024-01-10 00:00:00,10,10,2,1,January,1,2024,2023,0,0,Winter
4,5,2024-01-05 00:00:00,5,5,1,1,January,1,2024,2023,0,0,Winter
3,4,2024-01-04 00:00:00,4,4,1,1,January,1,2024,2023,0,0,Winter
8,9,2024-01-09 00:00:00,9,9,2,1,January,1,2024,2023,0,0,Winter
6,7,2024-01-07 00:00:00,7,7,1,1,January,1,2024,2023,0,1,Winter


In [31]:
cur.close()
conn.close()