## Create Date Dimension Table

In [7]:
# A date dimension table is a specialized table in a data warehouse that
# stores detailed information about dates. It serves as a centralized
# reference for all time-related data, enabling efficient organization,
# analysis, and reporting of temporal information.
# It can consist columns such as name of the day, name of the month, was it
# a weekend (is_weekend), was it a holiday (is_holiday), quarter. For
# financial data, fiscal year is also used.
#


In [8]:
# Import the datetime class from Python’s built-in datetime module.
from datetime import datetime

# Import data manipulation library Pandas.
import pandas as pd

# Create a datetime object representing January 1st, 2023,
# at midnight (00:00:00)
start_date = datetime(2023, 1, 1)
# Create a datetime object representing January 1st, 2028,
# also at midnight
end_date = datetime(2028, 1, 1)
end_date


datetime.datetime(2028, 1, 1, 0, 0)

In [9]:
# pd.date_range() - a function from Pandas that creates a sequence of dates
# between start_date and end_date. The result is a DatetimeIndex object,
# which is great for time series data. By default, the frequency is 'D'
# (daily), so this will generate a list of every day from January 1, 2023
# to January 1, 2028 inclusive.
date_range = pd.date_range(start_date, end_date)
# Create a Pandas DataFrame from the date_range with a single column "date"
date_df = pd.DataFrame(date_range, columns=["date"])
# Display date_df head part
date_df.head()


Unnamed: 0,date
0,2023-01-01
1,2023-01-02
2,2023-01-03
3,2023-01-04
4,2023-01-05


In [10]:
# Access the "date" column in DataFrame, than with .dt access datetime
# object, than with .year extracts the year from each date in the column,
# than with .head() the first 5 values of the result.
date_df["date"].dt.year.head()


0    2023
1    2023
2    2023
3    2023
4    2023
Name: date, dtype: int32

In [11]:
# date_df - this is a Pandas DataFrame that contains a column named "date".
# date_df["date"] - selects the "date" column from the DataFrame.
# .dt - this is the datetime accessor in Pandas. It allows you to perform
#       vectorized datetime operations on a Series with datetime-like values.
# .year - extracts the year from each datetime object.
# date_df["year"] = ... - creates a new column named "year" in the DataFrame
#        and assigns the extracted year values.
date_df["year"] = date_df["date"].dt.year
# date_df["month"] = ... - creates a new column named "month" in the DataFrame
#        and assigns the extracted year values
date_df["month"] = date_df["date"].dt.month
# date_df["day"] = ... - creates a new column named "day" in the DataFrame
#        and assigns the extracted year values
date_df["day"] = date_df["date"].dt.day
# This gets the day of the week from each date and shifts from 0-6 to 1-7
date_df["day_of_week"] = date_df["date"].dt.dayofweek + 1
# .isin([6, 7]) - check if each value in the "day_of_week" column is either 6
# or 7 and returns True if the value Saturday or Sunday
date_df["is_weekend"] = date_df["day_of_week"].isin([6, 7])
date_df


Unnamed: 0,date,year,month,day,day_of_week,is_weekend
0,2023-01-01,2023,1,1,7,True
1,2023-01-02,2023,1,2,1,False
2,2023-01-03,2023,1,3,2,False
3,2023-01-04,2023,1,4,3,False
4,2023-01-05,2023,1,5,4,False
...,...,...,...,...,...,...
1822,2027-12-28,2027,12,28,2,False
1823,2027-12-29,2027,12,29,3,False
1824,2027-12-30,2027,12,30,4,False
1825,2027-12-31,2027,12,31,5,False


In [12]:
# Exports DataFrame to a CSV file
date_df.to_csv("date_dimension.csv", index=False)
