In [None]:
%pip install FabricSync --quiet

In [None]:
from delta import DeltaTable
from pyspark.sql.functions import lit, col, max

from FabricSync.BQ.Model.Config import *
from FabricSync.BQ.Enum import *

### Configuration Set-Up

Values should match the configuration provided. 

1. project_id - GCP project id
2. dataset - GCP dataset id
3. table_name - Table name
4. watermark_column - Table column to use for watermark
5. config_json_path - Path to configuration file

In [None]:
project_id="<<GCP PROJECT ID>>"
dataset="<<<GCP DATASET ID>>"
table_name="<<<TABLE, VIEW, MATERIALIZED VIEW NAME>>>"
watermark_column="<<WATERMARK COLUMN NAME>>>"

config_json_path = "<<<PATH TO CONFIGURATION FILE>>>"

In [None]:
config = ConfigDataset.from_json(config_json_path)

predicate = f"sync_id='{config.ID}' AND project_id='{project_id}' AND dataset='{dataset}' AND table_name='{table_name}'"
sync_cfg = spark.table("sync_configuration").where(predicate)

if sync_cfg.count() > 0:
    print("Updating configuration...")
    c = next(sync_cfg.toLocalIterator(), None)

    #For committed tables, get the max watermark
    if c["sync_state"] == "COMMIT":
        print("Committed table setting watermark...")
        sync_schedule = spark.table("sync_schedule").where(predicate).orderBy(col("completed").desc())
        s = next(sync_schedule.toLocalIterator(), None)

        if s:
            df = spark.table(f"{c['lakehouse']}.{c['lakehouse_table_name']}")
            df = df.agg(max(watermark_column).alias("watermark"))

            w = next(df.toLocalIterator(), None)

            if w:
                watermark = str(w["watermark"])
                print(f"Found max watermark: {watermark} ...")  

                deltaTable = DeltaTable.forName(spark, "sync_schedule")
                deltaTable.update(
                    condition = f"sync_id='{config.ID}' AND schedule_id='{s['schedule_id']}'",
                    set = { 
                        'max_watermark': lit(watermark)
                    }
                )
    
    #Update sync_configuration metastore
    print("Updating sync_configuration...")
    deltaTable = DeltaTable.forName(spark, "sync_configuration")
    deltaTable.update(
        condition = predicate,
        set = { 
            'load_strategy': lit(SyncLoadStrategy.WATERMARK) ,
            'load_type': lit(SyncLoadType.APPEND),
            'watermark_column': lit(watermark_column)
        }
    )

    #Update User Configuration File
    print("Updating user configuration...")
    table = next((table for table in config.Tables if table.TableName == table_name), None)

    if table:
        table.LoadStrategy = SyncLoadStrategy.WATERMARK
        table.LoadType = SyncLoadType.APPEND
        table.Keys = [ConfigTableColumn(column=watermark_column)]

        config.Tables = [table if tbl.TableName == table.TableName else tbl for tbl in config.Tables]
        config.to_json(config_json_path)

        print("Finished...")
else:
    print("Configuration not found...")