In [0]:
from pyspark.sql.connect.dataframe import DataFrame

from common import (
    read_sql_template,
    table_exists, 
    use_schema_and_create_if_not_exists,
)


In [0]:

CATALOG = "precos_pmc"
SCHEMA = "gold"



In [0]:
use_schema_and_create_if_not_exists(spark, catalog=CATALOG, schema=SCHEMA)


In [0]:
def write_gold_snapshot_star_schema_delta_table_on_s3(
    spark,
    df: DataFrame, 
    table_name: str
) -> bool:

    if df.isEmpty():
        print(f"Nothing to insert in {table_name}")
        return False

    full_table_name = f"{CATALOG}.{SCHEMA}.{table_name}"

    try:
        
        # Create table
        if not table_exists(spark, CATALOG, SCHEMA, table_name):
            print(f"Creating snapshot {full_table_name}")
            (
                df
                .writeTo(full_table_name)
                .using("delta")
                .partitionedBy("dt_ultima_atualizacao")
                .create()
            )
    
        # Grants idempotency replaces
        else:

            print(f"Replacing snapshot in {full_table_name}")

            spark.sql(f"DELETE FROM {full_table_name}")        
            
            (
                df
                .writeTo(full_table_name)
                .append()
            )

        print(f'Sucessful inserted into {full_table_name}')
        return True
    
    except Exception as e:
        print(f'Error while writing {SCHEMA} delta ({table_name}): {e}')
        return False


In [0]:

# Fact: precos
sql_f_precos = read_sql_template('silver_to_gold_f_precos.sql')
df_f_precos = spark.sql(sql_f_precos)
write_gold_snapshot_star_schema_delta_table_on_s3(spark, df=df_f_precos, table_name='f_precos')

# Dimension: produtos
sql_d_produtos = read_sql_template('silver_to_gold_d_produtos.sql')
df_d_produtos = spark.sql(sql_d_produtos)
write_gold_snapshot_star_schema_delta_table_on_s3(spark, df=df_d_produtos, table_name='d_produtos')

# Dimension: empresas
sql_d_empresas = read_sql_template('silver_to_gold_d_empresas.sql')
df_d_empresas = spark.sql(sql_d_empresas)
write_gold_snapshot_star_schema_delta_table_on_s3(spark, df=df_d_empresas, table_name='d_empresas')

