In [0]:
dbutils.widgets.text("param_data_source", "")
var_data_source = dbutils.widgets.get("param_data_source")

In [0]:
dbutils.widgets.text("param_file_date", "2021-03-28") # based on the name of the subfolder in blob storage
var_file_date = dbutils.widgets.get("param_file_date")

In [0]:
# create Delta Table
spark.sql(f"""
            CREATE TABLE IF NOT EXISTS f1_presentation.calculated_race_results
            (
            race_year INT, 
            team_name STRING,
            driver_id INT, 
            driver_name STRING,
            race_id INT, 
            position INT, 
            points INT, 
            calculated_points INT, 
            created_date TIMESTAMP, 
            updated_date TIMESTAMP
            )
            USING DELTA 
""")

In [0]:
# find out who is the dominant driver as well as dominant team 
# then save it as a table for analysis purpose

spark.sql(f"""
        CREATE OR REPLACE TEMP VIEW race_result_updated
        AS 
        (
        SELECT 
            races.race_year, constructors.name AS team_name, drivers.driver_id,
            drivers.name AS driver_name, races.race_id, results.position, results.points, 11-results.position as calculated_points
        FROM f1_processed.results
        JOIN f1_processed.drivers ON (results.driver_id = drivers.driver_id)
        JOIN f1_processed.constructors ON (results.constructor_id = constructors.constructor_id)
        JOIN f1_processed.races ON (results.race_id = races.race_id)
        WHERE results.position <= 10 -- only get the top 10
                AND results.file_date = "{var_file_date}"
        )
""")

In [0]:
spark.sql(f"""
    MERGE INTO f1_presentation.calculated_race_results tgt
    USING race_result_updated upd
    on (tgt.driver_id = upd.driver_id AND tgt.race_id = upd.race_id)
    WHEN MATCHED THEN
        UPDATE SET tgt.position = upd.position,
                    tgt.points = upd.points,
                    tgt.calculated_points = upd.calculated_points,
                    tgt.updated_date = current_timestamp()
    WHEN NOT MATCHED
    THEN INSERT (race_year, team_name, driver_id, driver_name, race_id, position, points, calculated_points, created_date)
        VALUES (race_year, team_name, driver_id, driver_name, race_id, position, points, calculated_points, current_timestamp)
""")

In [0]:
%sql
SELECT COUNT(1) FROM race_result_updated

In [0]:
%sql
SELECT COUNT(1) FROM f1_presentation.calculated_race_results