In [None]:
from delta.tables import *
from notebookutils import mssparkutils
from pyspark.sql.functions import row_number,max, lit, lower, regexp_replace
from pyspark.sql.window import Window
import requests
import os
spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled","true") # needed for automatic schema evolution in merge

In [None]:
source_df = DeltaTable.forPath(spark,"Tables/focus").toDF()
source_df = source_df.where("ServiceName = 'Microsoft.Fabric'")

# DIM subcriptions

In [None]:
tableName = "subscriptions"
logicalKeyColumn = "SubId"
technicalKeyColumn = "SubscriptionKey"
tableAlreadyExists = spark._jsparkSession.catalog().tableExists('FCA', tableName)


source_merge_df = source_df.select("SubAccountId","SubAccountName","SubAccountType") \
                            .withColumn("SubId",regexp_replace("SubAccountId","/subscriptions/","")) \
                            .dropDuplicates(["SubId"])

if tableAlreadyExists:
    #Merge to table

    print(f"Merge Data for {tableName} table Started")

    target_table = DeltaTable.forPath(spark, f"Tables/{tableName}")
    target_df = target_table.toDF()
    target_df = target_df.select(logicalKeyColumn,technicalKeyColumn)
     
    
    max_key = target_df.agg(max(technicalKeyColumn)).collect()[0][0]

    combined_df = source_merge_df.join(target_df,logicalKeyColumn,"leftouter")
    existingRows_df = combined_df.where(combined_df[technicalKeyColumn].isNotNull())
    newRows_df = combined_df.where(combined_df[technicalKeyColumn].isNull())
    window_spec = Window.orderBy(logicalKeyColumn)
    newRows_df = newRows_df.withColumn(technicalKeyColumn, row_number().over(window_spec) + max_key )

    Src_Merge_df = existingRows_df.union(newRows_df)
    

    merge = (target_table.alias("target")
        .merge(
            Src_Merge_df.alias("source"),
            f"target.{technicalKeyColumn} = source.{technicalKeyColumn}"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        )
    merge.execute()

    print(f"Merge Data for {tableName} Ended")


else:
    print(f"Table {tableName} creation started")
    window_spec = Window.orderBy("SubId")
    source_merge_df = source_merge_df.withColumn(technicalKeyColumn, row_number().over(window_spec))
    source_merge_df.write.mode("overwrite").option("mergeSchema", "true").format("delta").saveAsTable(tableName)
    print(f"Table {tableName} creation Ended")
