### Criação da dimensão tempo

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, expr, date_format
from pyspark.sql.types import DateType
from datetime import datetime, timedelta

spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY")

In [0]:
# Definir o período da dimensão de tempo
start_date = datetime(2000, 1, 1)  # Data inicial
end_date = datetime(2030, 12, 31)  # Data final

# Criar uma lista de datas
date_list = [(start_date + timedelta(days=i)).strftime("%Y-%m-%d") for i in range((end_date - start_date).days + 1)]

# Criar DataFrame a partir da lista de datas
df = spark.createDataFrame(date_list, "string").toDF("date")

# Converter para tipo DateType
df = df.withColumn("date", col("date").cast(DateType()))

# Adicionar colunas da dimensão de tempo
df = df.withColumn("ano", date_format(col("date"), "yyyy").cast("int")) \
       .withColumn("mes", date_format(col("date"), "MM").cast("int")) \
       .withColumn("dia", date_format(col("date"), "dd").cast("int")) \
       .withColumn("dia_da_semana", date_format(col("date"), "E")) \
       .withColumn("dia_do_ano", date_format(col("date"), "D").cast("int")) \
       .withColumn("semana_do_ano", date_format(col("date"), "w").cast("int")) \
       .withColumn("quarter", expr("quarter(date)")) \
       .withColumn("flag_fim_de_semana", expr("CASE WHEN dia_da_semana IN ('Sat', 'Sun') THEN 1 ELSE 0 END"))

In [0]:
df.write.format("delta").mode("overwrite").saveAsTable("default.dim_tempo")