In [None]:
from delta.tables import *
# import datetime 
import time

# dt = str(datetime.date.today())
year, month, day = time.strftime("%Y"), time.strftime("%m"), time.strftime("%d")
hour, minute, second = time.strftime("%H"), time.strftime("%M"), time.strftime("%S")

columns = ["domain_name", "schema_name", "table_name", "incremental_column", "incremental_column_value", "merge_key_column", "delta_lakehouse_path"]
values = [("sales", "dbo", "products", "updated_at", "1900-01-01 00:00:00.000", "product_code",""),
       ("sales", "dbo", "store_customers", "updated_at", "1900-01-01 00:00:00.000", "customer_id",""),
       ("sales", "dbo", "store_orders", "updated_at", "1900-01-01 00:00:00.000", "orders_number",""),
	   ("supply", "dbo", "inventory", "updated_at", "1900-01-01 00:00:00.000", "product","")]

if not spark.catalog.tableExists("bronze_watermark_table"):
    DeltaTable.createIfNotExists(spark) \
        .tableName("bronze_watermark_table") \
        .addColumn("domain_name", "STRING") \
        .addColumn("schema_name", "STRING") \
        .addColumn("table_name", "STRING") \
        .addColumn("incremental_column", "STRING") \
        .addColumn("incremental_column_value", "TIMESTAMP", comment = "updated on each run") \
        .addColumn("merge_key_column", "STRING") \
        .addColumn("delta_lakehouse_path", "STRING", comment = "updated on each run") \
        .execute()
        
    df = spark.createDataFrame(values, columns)
    df = df.withColumn("incremental_column_value",df.incremental_column_value.cast("timestamp"))
    df.write.mode("append").format("delta").saveAsTable("bronze_watermark_table")

In [None]:
def get_tables_list():
    _df = spark.read.table("bronze_watermark_table")
    display(_df)
    tbls = _df.select("table_name", "domain_name").collect()

    return tbls


In [None]:
def create_and_update_delta_source_path(_domain_name, _table, _year, _month, _day, _hour):
    folder = f"bronze/{_domain_name}/{_table}/{_year}/{_month}/{_day}/{_hour}"
    path = f"Files/{folder}"
   
    try:
        if not mssparkutils.fs.exists(path):
            print(f"path does not exit, creating path: {path}")
            mssparkutils.fs.mkdirs(path)
        else:
            print(f"path: {path} already exists")
    except Exception as e:
        print(e)
   
    query = f"UPDATE bronze_watermark_table SET delta_lakehouse_path = '{folder}' WHERE table_name = '{_table}'"
    spark.sql(query)


In [None]:
tables_to_process = get_tables_list()
for row in tables_to_process:
    table = row["table_name"]
    domain_name = row["domain_name"]
    create_and_update_delta_source_path(domain_name, table, year, month, day, hour)


In [None]:
%%sql

SELECT table_name, delta_lakehouse_path FROM bronze_watermark_table
