In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.utils import AnalysisException


In [0]:
catalog = "fuel"
# silver tables
silver_state_table = f"{catalog}.silver.state"
silver_brand_table = f"{catalog}.silver.brand"
silver_station_table = f"{catalog}.silver.station"
silver_fuel_table = f"{catalog}.silver.fuel"
silver_price_table = f"{catalog}.silver.price"
# gold tables
gold_station_latest_price_table = f"{catalog}.gold.station_latest_price"

In [0]:
latest_price_df = spark.sql(f"""
    with cte as (
        select 
            station_id, fuel_id, price, changed_at,
            row_number() over (partition by station_id, fuel_id order by changed_at desc) as rn,
            lead(price, 1) over (partition by station_id, fuel_id order by changed_at desc) as prev_price,
            lead(changed_at, 1) over (partition by station_id, fuel_id order by changed_at desc) as prev_changed_at,
            ((price / prev_price) - 1) * 100.0 as pct_change,
            extract(day from changed_at - prev_changed_at) as days_between_change,
            extract(day from now() - changed_at) as days_since_latest_change
        from {silver_price_table}
    )
    select 
        s.station_code,
        b.brand_name,
        st.state_code,
        f.fuel_code,
        cte.price,
        cte.prev_price,
        cte.pct_change,
        cte.days_between_change,
        cte.days_since_latest_change,
        s.longitude,
        s.latitude
    from 
        (cte left join
        (({silver_station_table} s left join {silver_brand_table} b on s.brand_id = b.brand_id)
        left join {silver_state_table} st on s.state_id = st.state_id)
        on cte.station_id = s.station_id) 
        left join {silver_fuel_table} f on cte.fuel_id = f.fuel_id
    where rn = 1
""")

# latest_price_df.show(5)


In [0]:

try:
    delta_table = DeltaTable.forName(spark, gold_station_latest_price_table)

    (
        delta_table.alias("t")
        .merge(
            latest_price_df.alias("s"),
            "t.station_code = s.station_code AND t.fuel_code = s.fuel_code"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute()
    )
except AnalysisException:
    (
        latest_price_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable(gold_station_latest_price_table)
    )