In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import pandas as pd

In [0]:
start_date = "2000-01-01"
end_date = "2099-12-31"

In [0]:
dates_df = (spark.range(0, (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days + 1)
            .withColumn("dte_sk", (lit(start_date).cast("date") + col("id").cast("int")).cast("date"))
            .drop("id")) \
            .withColumn("dte_int", date_format(col("dte_sk"), "yyyyMMdd").cast("int")) \
            .withColumn("dte_year", year(col("dte_sk"))) \
            .withColumn("dte_month", month(col("dte_sk"))) \
            .withColumn("dte_month_name", date_format(col("dte_sk"), "MMMM")) \
            .withColumn("dte_day_of_mth", dayofmonth(col("dte_sk"))) \
            .withColumn("dte_day_name", date_format(col("dte_sk"), "EEEE")) \
            .withColumn("dte_day_of_week", dayofweek(col("dte_sk"))) \
            .withColumn("dte_is_weekend", when(col("dte_day_of_week").isin(1, 7), 1).otherwise(0)) \
            .withColumn("dte_week_of_year", weekofyear(col("dte_sk"))) \
            .withColumn("dte_quarter", quarter(col("dte_sk"))) \
            .withColumn("dte_is_leap_year", when(year(col("dte_sk")) % 4 == 0, 1).otherwise(0)) \
            .orderBy("dte_sk")

In [0]:
dates_df.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .saveAsTable("mta_silver.dim_date")

In [0]:
%sql
SELECT * FROM mta_silver.dim_date LIMIT 10;

dte_sk,dte_int,dte_year,dte_month,dte_month_name,dte_day_of_mth,dte_day_name,dte_day_of_week,dte_is_weekend,dte_week_of_year,dte_quarter,dte_is_leap_year
2000-01-01,20000101,2000,1,January,1,Saturday,7,1,52,1,1
2000-01-02,20000102,2000,1,January,2,Sunday,1,1,52,1,1
2000-01-03,20000103,2000,1,January,3,Monday,2,0,1,1,1
2000-01-04,20000104,2000,1,January,4,Tuesday,3,0,1,1,1
2000-01-05,20000105,2000,1,January,5,Wednesday,4,0,1,1,1
2000-01-06,20000106,2000,1,January,6,Thursday,5,0,1,1,1
2000-01-07,20000107,2000,1,January,7,Friday,6,0,1,1,1
2000-01-08,20000108,2000,1,January,8,Saturday,7,1,1,1,1
2000-01-09,20000109,2000,1,January,9,Sunday,1,1,1,1,1
2000-01-10,20000110,2000,1,January,10,Monday,2,0,2,1,1


In [0]:
dbutils.notebook.exit("Success")