In [0]:
spark.sql("DROP TABLE IF EXISTS workspace.gold.dim_time")

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS workspace.gold.dim_time (
  date_key BIGINT,
  date TIMESTAMP,
  year INT,
  quarter INT,
  month INT,
  month_name STRING,
  day INT,
  day_name STRING,
  week_of_year BIGINT,
  is_weekend BOOLEAN

)
""")



In [0]:
import pandas as pd

df_spark = spark.table("workspace.silver.tvmaze")

df = df_spark.toPandas()

date_series = pd.to_datetime(df["airdate"], errors="coerce")
date_series = date_series.dropna().drop_duplicates().sort_values()

dim = pd.DataFrame({"date": date_series})
dim["date_key"] = dim["date"].dt.strftime("%Y%m%d").astype(int)
dim["year"] = dim["date"].dt.year
dim["month"] = dim["date"].dt.month
dim["day"] = dim["date"].dt.day
dim["day_name"] = dim["date"].dt.day_name()
dim["week_of_year"] = dim["date"].dt.isocalendar().week.astype(int)
dim["month_name"] = dim["date"].dt.month_name()
dim["quarter"] = dim["date"].dt.quarter
dim["is_weekend"] = dim["day_name"].isin(["Saturday", "Sunday"])

dim = dim[["date_key", "date", "year", "quarter", "month", "month_name", "day", "day_name", "week_of_year", "is_weekend"]]
  
df_spark = spark.createDataFrame(dim)


In [0]:
# Usamos overwrite para reemplazar completamente los datos existentes
df_spark.write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("workspace.gold.dim_time")