In [0]:
from pyspark.sql.functions import current_date

# Load the distinct games with all attributes from the source table
games_df = spark.sql("""
    SELECT DISTINCT GameID, Publisher, Genre, Rating, Game_length
    FROM dev_catalog.silver.game
""")

# Load the publisher dimension table
publishers_df = spark.sql("""
    SELECT Publisher as Publisher_src, sk_id as sk_id_publisher
    FROM dev_catalog.gold.dim_publisher
""") 

# Join the games with the publisher dimension table on the foreign key sk_id_publisher
dim_game_df = games_df.join(publishers_df, games_df.Publisher == publishers_df.Publisher_src, "inner") \
                      .withColumn("insert_date", current_date()) \
                      .withColumn("update_date", current_date()) \
                      .drop("Publisher_src")\
                      .drop("Publisher")

# Save the resulting dimension table into dev_catalog.gold.dim_game with merge based on GameID
dim_game_df.createOrReplaceTempView("dim_game_temp")

merge_query = """
    MERGE INTO dev_catalog.gold.dim_game AS target
    USING dim_game_temp AS source
    ON target.GameID = source.GameID
    WHEN MATCHED THEN
        UPDATE SET 
            target.GameID = source.GameID,
            target.SK_ID_Publisher = source.sk_id_publisher,
            target.Genre = source.Genre,
            target.Rating = source.Rating,
            target.Game_length = source.Game_length,
            target.update_date = source.update_date
    WHEN NOT MATCHED THEN
        INSERT (GameID, SK_ID_Publisher, Genre, Rating, Game_length, insert_date, update_date)
        VALUES (source.GameID, source.sk_id_publisher, source.Genre, source.Rating, source.Game_length, source.insert_date, source.update_date)
"""

spark.sql(merge_query)

# Display the resulting dimension table
#display(dim_game_df)