In [0]:
from pyspark.sql.functions import current_timestamp, when, col

# Load source table
df = spark.table("dev_catalog.bronze.player_activity")

# Join to replace GameID with SK_ID from dev_catalog.silver.game
game_df = spark.table("dev_catalog.silver.game").select(
    col("GameID").alias("game_GameID"), 
    col("SK_ID").alias("game_SK_ID")
)
df = df.join(game_df, df.GameID == game_df.game_GameID, "left").withColumn("SK_GameID", col("game_SK_ID"))

# Deduplicate
df = df.dropDuplicates()

# Replace null values
df = df.fillna({
    'PlayerID': 'unknown',
    'Activity_Type': 'unknown',
    'Level': 0,
    'source_file': 'unknown'
})

# Add insert and update date fields
df = df.withColumnRenamed("dt_insert", "insert_date").withColumn("update_date", current_timestamp())

# Create target table if it does not exist and upsert player activity data based on the playerid field
df.createOrReplaceTempView("temp_view_player_activity")

merge_query = """
MERGE INTO `dev_catalog`.`silver`.`player_activity` AS target
USING `temp_view_player_activity` AS source
ON target.`PlayerID` = source.`PlayerID`
WHEN MATCHED THEN UPDATE SET
  target.`PlayerID` = source.`PlayerID`,
  target.`SK_GameID` = source.`SK_GameID`,
  target.`GameID` = source.`GameID`,
  target.`SessionID` = source.`SessionID`,
  target.`Activity_type` = source.`Activity_type`,
  target.`Level` = source.`Level`,
  target.`ExperiencePoints` = source.`ExperiencePoints`,
  target.`AchievementUnlocked` = source.`AchievementUnlocked`,
  target.`CurrencyEarned` = source.`CurrencyEarned`,
  target.`CurrencySpent` = source.`CurrencySpent`,
  target.`QuestCompleted` = source.`QuestCompleted`,
  target.`EnemiesDefeated` = source.`EnemiesDefeated`,
  target.`ItemsCollected` = source.`ItemsCollected`,
  target.`Deaths` = source.`Deaths`,
  target.`DistanceTravelled` = source.`DistanceTravelled`,
  target.`ChatMessagesSent` = source.`ChatMessagesSent`,
  target.`TeamEventsParticipated` = source.`TeamEventsParticipated`,
  target.`PlayMode` = source.`PlayMode`,
  target.`Source_file` = source.`Source_file`,
  target.`Update_date` = source.`Update_date`
WHEN NOT MATCHED THEN INSERT (
  target.`PlayerID`,
  target.`SK_GameID`,
  target.`GameID`,
  target.`SessionID`,
  target.`Activity_type`,
  target.`Level`,
  target.`ExperiencePoints`,
  target.`AchievementUnlocked`,
  target.`CurrencyEarned`,
  target.`CurrencySpent`,
  target.`QuestCompleted`,
  target.`EnemiesDefeated`,
  target.`ItemsCollected`,
  target.`Deaths`,
  target.`DistanceTravelled`,
  target.`ChatMessagesSent`,
  target.`TeamEventsParticipated`,
  target.`PlayMode`,
  target.`Source_file`,
  target.`Insert_date`,
  target.`Update_date`
)
VALUES (
  source.`PlayerID`,
  source.`SK_GameID`,
  source.`GameID`,
  source.`SessionID`,
  source.`Activity_type`,
  source.`Level`,
  source.`ExperiencePoints`,
  source.`AchievementUnlocked`,
  source.`CurrencyEarned`,
  source.`CurrencySpent`,
  source.`QuestCompleted`,
  source.`EnemiesDefeated`,
  source.`ItemsCollected`,
  source.`Deaths`,
  source.`DistanceTravelled`,
  source.`ChatMessagesSent`,
  source.`TeamEventsParticipated`,
  source.`PlayMode`,
  source.`Source_file`,
  source.`Insert_date`,
  source.`Update_date`
)
"""

spark.sql(merge_query)