### **Transform Game Facts**

### Parameters

In [1]:
season = 2022

### Configuration

In [66]:
%run /utils/general_functions

In [67]:
create_mounts()

### Load Data

In [68]:
job_id = mssparkutils.env.getJobId()

game_df = spark.read.format('delta').load(f'synfs:/{job_id}/mnt/silver/fact_games/season={season}')
team_df = spark.read.format('delta').load(f'synfs:/{job_id}/mnt/silver/dim_team')

### Transformations

In [69]:
from pyspark.sql.functions import col

game_joined_df = game_df.alias('game').join(team_df.alias('team1'), (col('game.home_team_id') == col('team1.team_id')) & (col('team1.is_active') == True)) \
    .join(team_df.alias('team2'), (col('game.visitor_team_id') == col('team2.team_id')) & (col('team2.is_active') == True))

In [70]:
# Replaces dim ids with their surrogate keys
game_final_df = game_joined_df.select('game_id', col('team1.team_key').alias('home_team_key'), col('team2.team_key').alias('visitor_team_key'),
     'date', 'home_team_score', 'visitor_team_score', 'season', 'period', 'postseason' ) \
     .distinct()

In [71]:
from pyspark.sql.functions import when, lit

# Handle NULL values
game_final_df = game_final_df \
    .withColumn('date', when(col('date').isNull(), lit('1900-01-01 00:00:00.000')).otherwise(col('date'))) \
    .withColumn('home_team_score', when(col('home_team_score').isNull(), 0).otherwise(col('home_team_score'))) \
    .withColumn('visitor_team_score', when(col('visitor_team_score').isNull(), 0).otherwise(col('visitor_team_score'))) \
    .withColumn('season', when(col('season').isNull(), 0).otherwise(col('season'))) \
    .withColumn('period', when(col('period').isNull(), 0).otherwise(col('period'))) \
    .withColumn('postseason', when(col('postseason').isNull(), lit(False)).otherwise(col('postseason'))) 

### Merge Data

In [72]:
container = 'gold'
database = 'prize_picks_gold'
table = 'fact_games'
file_format = 'delta'
partition_col = 'season'
merge_condition = 'tgt.game_id = src.game_id'

merge_data(game_final_df, container, database, table, file_format, partition_col, merge_condition )

In [None]:
%%sql
DROP TABLE IF EXISTS prize_picks_silver.fact_games;

In [73]:
# spark.sql(f'SELECT COUNT(*) FROM prize_picks_gold.fact_games').show()