# Dim Calendário

In [0]:
from pyspark.sql.functions import col, expr, lit, date_format, date_add
from pyspark.sql.types import DateType
from datetime import datetime

data_inicio = datetime(2020, 1, 1)
data_fim = datetime(2030, 12, 31)
numero_dias = (data_fim - data_inicio).days + 1

dim_calendario = (
    spark.range(0, numero_dias)
    .withColumn("data", date_add(lit(data_inicio), col("id").cast("int")))
    .drop("id")
)

dim_calendario = (
    dim_calendario
    .withColumn("ano", expr("year(data)"))
    .withColumn("mes", expr("month(data)"))
    .withColumn("dia", expr("day(data)"))
    .withColumn("dia_semana", expr("dayofweek(data)"))  # 1 = Domingo
    .withColumn("nome_dia", date_format("data", "EEEE"))
    .withColumn("nome_mes", date_format("data", "MMMM"))
    .withColumn("bimestre", expr("CASE WHEN month(data) <= 2 THEN 1 WHEN month(data) <= 4 THEN 2 WHEN month(data) <= 6 THEN 3 WHEN month(data) <= 8 THEN 4 WHEN month(data) <= 10 THEN 5 ELSE 6 END"))
    .withColumn("trimestre", expr("quarter(data)"))
    .withColumn("quadrimestre", expr("CASE WHEN month(data) <= 4 THEN 1 WHEN month(data) <= 8 THEN 2 ELSE 3 END"))
    .withColumn("semestre", expr("CASE WHEN month(data) <= 6 THEN 1 ELSE 2 END"))
)

In [0]:
%sql

CREATE SCHEMA IF NOT EXISTS Cotacao

In [0]:
dim_calendario.write \
.format("delta") \
.mode("overwrite") \
.saveAsTable("Cotacao.Dim_Calendario")

In [0]:
%sql
SELECT *
FROM Cotacao.Dim_Calendario
LIMIT 5