In [0]:
spark.sql("DROP TABLE IF EXISTS workspace.gold.dim_networks")

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS workspace.gold.dim_networks (
  network_id DOUBLE,
  network_name STRING,
  country_code STRING,
  country_name STRING,
  network_type STRING
)
""")


In [0]:
import pandas as pd

df = spark.table("workspace.silver.tvmaze").toPandas()

broadcast = (
    df.reindex(
        columns=[
            "show_network_id",
            "show_network_name",
            "show_network_country_code",
            "show_network_country_name",
        ]
    )
    .rename(
        columns={
            "show_network_id": "network_id",
            "show_network_name": "network_name",
            "show_network_country_code": "country_code",
            "show_network_country_name": "country_name",
        }
    )
    .assign(network_type="broadcast")
)

web = (
    df.reindex(
        columns=[
            "show_webchannel_id",
            "show_webchannel_name",
            "show_webchannel_country_code",
            "show_webchannel_country_name",
        ]
    )
    .rename(
        columns={
            "show_webchannel_id": "network_id",
            "show_webchannel_name": "network_name",
            "show_webchannel_country_code": "country_code",
            "show_webchannel_country_name": "country_name",
        }
    )
    .assign(network_type="web")
)

dim = (
    pd.concat([broadcast, web], ignore_index=True)
    .dropna(subset=["network_id"])
    .drop_duplicates(subset=["network_id"])
    .sort_values("network_name", ignore_index=True)
)

df_spark = spark.createDataFrame(dim)


In [0]:

# Usamos overwrite para reemplazar completamente los datos existentes
df_spark.write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("workspace.gold.dim_networks")