In [0]:
bronze_table = "bronze.data_suicide.hdi_owid"                          
silver_table = "silver.data_suicide.hdi_owid"   

In [0]:
from pyspark.sql import functions as F

In [0]:
df_raw = spark.read.table(bronze_table)

df_silver = (
    df_raw
    .withColumnRenamed("Entity", "country_name")
    .withColumnRenamed("Code", "country_code_iso3")
    .withColumnRenamed("Year", "year")
    .withColumnRenamed("human_development_index", "hdi")
    .filter(F.col("country_code_iso3").isNotNull())    # remove agregados sem ISO3
    .withColumn("year", F.col("year").cast("int"))
    .withColumn("hdi", F.col("hdi").cast("double"))
    .select("country_code_iso3", "country_name", "year", "hdi")
)


In [0]:
(
    df_silver.write
    .mode("overwrite")
    .option("mergeSchema","true")
    .saveAsTable(silver_table)
)

In [0]:
# Comentários na tabela/colunas
spark.sql(f"""
COMMENT ON TABLE {silver_table} IS
'HDI (Human Development Index) do Our World in Data: país-ano, ISO3 e valor do índice. Derivado de {bronze_table}.'
""")
spark.sql(f"COMMENT ON COLUMN {silver_table}.country_code_iso3 IS 'Código ISO3 do país (OWID/ISO)';")
spark.sql(f"COMMENT ON COLUMN {silver_table}.country_name IS 'Nome do país (OWID)';")
spark.sql(f"COMMENT ON COLUMN {silver_table}.year IS 'Ano (Gregorian)';")
spark.sql(f"COMMENT ON COLUMN {silver_table}.hdi IS 'Human Development Index (0–1), conforme OWID/UNDP';")

print("Silver gravada em:", silver_table)

In [0]:
display(spark.sql(f"""DESCRIBE TABLE {silver_table}"""))

In [0]:
display(spark.sql(f"""select * from {silver_table}"""))