### **Ingest Games JSON**
Process the game_file*.json files to a table in the silver container

### Parameters

In [9]:
season = 2016

### Configuration

In [10]:
%run /utils/general_functions

In [11]:
create_mounts()

### Define Schema

In [12]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, BooleanType, FloatType, TimestampType

team_schema = StructType([
    StructField('id', IntegerType(), False),
    StructField('abbreviation', StringType(), True),
    StructField('city', StringType(), True),
    StructField('conference', StringType(), True),
    StructField('division', StringType(), True),
    StructField('full_name', StringType(), True),
    StructField('name', StringType(), True),
])

game_schema = StructType([
    StructField('id', IntegerType(), False),
    StructField('date', TimestampType(), True),
    StructField('home_team_score', IntegerType(), True),
    StructField('visitor_team_score', IntegerType(), True),
    StructField('season', IntegerType(), True),
    StructField('period', IntegerType(), True),
    StructField('status', StringType(), True),
    StructField('time', StringType(), True),
    StructField('postseason', BooleanType(), True),
    StructField('home_team', team_schema),
    StructField('visitor_team', team_schema)
])




### Read Game Files

In [13]:
job_id = mssparkutils.env.getJobId()

game_df = spark.read.json(f'synfs:/{job_id}/mnt/bronze/games/season={season}/team_id=*/*.json', game_schema)

### Transformations

In [14]:
from pyspark.sql.functions import col

game_df = game_df.withColumnRenamed('id', 'game_id') \
    .withColumn('home_team_id', col('home_team.id')) \
    .withColumn('visitor_team_id', col('visitor_team.id')) \
    .drop(col('home_team')) \
    .drop(col('visitor_team')) \
    .drop(col('status')) \
    .drop(col('time')) 

In [None]:
game_final_df = game_df.distinct()

### Write file as table

In [None]:
%%sql
DROP TABLE IF EXISTS prize_picks_silver.fact_games;

In [None]:
container = 'silver'
database = 'prize_picks_silver'
table = 'fact_games'
file_format = 'delta'
partition_col = 'season'
merge_condition = 'tgt.game_id = src.game_id'

merge_data(game_final_df, container, database, table, file_format, partition_col, merge_condition)