In [0]:
from pyspark.sql.functions import sum, desc, avg, col, round
from pyspark.sql import Window

In [0]:
fct_wifi_location_df = spark.read \
    .table("mta_silver.fct_wifi_location")

lkp_wifi_line_df = spark.read \
    .table("mta_silver.lkp_wifi_line")

dim_line_df = spark.read \
    .table("mta_silver.dim_line")

In [0]:
window_operator = Window.partitionBy("lin_operator")
window_over_all = Window.orderBy().rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)


wifi_conn_by_line_t1_df = fct_wifi_location_df \
    .join(lkp_wifi_line_df, fct_wifi_location_df.wfi_sk == lkp_wifi_line_df.wfi_sk, "inner") \
    .join(dim_line_df, lkp_wifi_line_df.lin_sk == dim_line_df.lin_sk, "inner") \
    .select(
        "lin_nk",
        "lin_name",
        "lin_operator",
        "wfi_connectivity_score",
    ).groupBy(
        "lin_nk",
        "lin_name",
        "lin_operator"
    ).agg(
        sum("wfi_connectivity_score").alias("wfi_connectivity_score")
    )

wifi_conn_by_line_t2_df = wifi_conn_by_line_t1_df \
    .withColumn("avg_operator_conn_score", round(avg(col("wfi_connectivity_score")).over(window_operator), 2)) \
    .withColumn("avg_conn_score_per_line", round(avg(col("wfi_connectivity_score")).over(window_over_all), 2)) \
    .orderBy(desc("wfi_connectivity_score"))

In [0]:
wifi_conn_by_line_final_df = wifi_conn_by_line_t2_df.select(
    col("lin_nk").alias("wcl_nk"),
    col("lin_name").alias("wcl_line"),
    col("lin_operator").alias("wcl_operator"),
    col("wfi_connectivity_score").alias("wcl_connectivity_score"),
    col("avg_operator_conn_score").alias("wcl_avg_operator_conn_score"),
    col("avg_conn_score_per_line").alias("wcl_conn_score_per_line")
)

In [0]:
wifi_conn_by_line_final_df.write.format("delta").mode("overwrite").saveAsTable("mta_gold.rpt_wifi_connectivity_by_line")

In [0]:
%sql
SELECT * FROM mta_gold.rpt_wifi_connectivity_by_line;

In [0]:
dbutils.notebook.exit("Success")