In [0]:
from pyspark.sql import functions as F, types as T
import json

dbutils.widgets.text("dt", "2026-01-25")
dbutils.widgets.text("bucket", "")
dbutils.widgets.text("bronze_prefix", "raw/optio")
dbutils.widgets.text("silver_prefix", "silver/optio")
dbutils.widgets.dropdown("table", "chain_daily_state", ["chain_daily_state", "active_locks_daily"])

dt = dbutils.widgets.get("dt")
bucket = dbutils.widgets.get("bucket").strip()
bronze_prefix = dbutils.widgets.get("bronze_prefix").strip().rstrip("/")
silver_prefix = dbutils.widgets.get("silver_prefix").strip().rstrip("/")
table = dbutils.widgets.get("table")

if not bucket:
    raise ValueError("bucket is required")

bronze_dt = f"s3://{bucket}/{bronze_prefix}/dt={dt}"
manifest_path = f"{bronze_dt}/_MANIFEST.json"



def read_manifest(path: str) -> dict:
    txt = spark.read.text(path).first()["value"]
    return json.loads(txt)

manifest = read_manifest(manifest_path)
datasets = manifest["datasets"]
snapshot_height = int(manifest["snapshot_height"])
manifest_created_at = manifest.get("created_at")  # ISO string

def dataset_run_path(name: str) -> str:
    info = datasets.get(name)
    if not info or info.get("status") != "SUCCESS":
        raise ValueError(f"Dataset not SUCCESS in manifest: {name}")
    return f"{bronze_dt}/dataset={name}/run_id={info['run_id']}"

def read_jsonl_gz(path: str):
    return spark.read.json(f"{path}/part-*.jsonl.gz")


def write_delta_partition(df, table_name: str):
    out_path = f"s3://{bucket}/{silver_prefix}/{table_name}"
    (df.write.format("delta")
        .mode("overwrite")
        .option("replaceWhere", f"dt = '{dt}'")
        .partitionBy("dt")
        .save(out_path))
    return out_path


def build_active_locks_daily():
    run_id = datasets["lockup_active_locks"]["run_id"]
    locks = read_jsonl_gz(dataset_run_path("lockup_active_locks"))

    df = (locks.select(
            F.lit(dt).alias("dt"),
            F.col("snapshot_height").cast("bigint").alias("height"),
            F.col("extracted_at").cast("timestamp").alias("snapshot_ts"),
            F.col("address").cast("string").alias("address"),
            F.to_date("unlock_date").alias("unlock_date"),
            F.col("amount_denom").cast("string").alias("denom"),
            F.col("amount_uopt").cast(T.DecimalType(38,0)).alias("amount_uopt"),
            F.lit(run_id).alias("source_run_id"),
        )
        .withColumn(
            "lock_id",
            F.sha2(
                F.concat_ws("|",
                    F.col("dt"),
                    F.col("height").cast("string"),
                    F.col("address"),
                    F.col("unlock_date").cast("string"),
                    F.col("denom"),
                    F.col("amount_uopt").cast("string"),
                ),
                256
            )
        )
        .dropDuplicates(["dt", "lock_id"])
    )
    return df


df = build_active_locks_daily()
path = write_delta_partition(df, "silver_active_locks_daily")

print("Wrote:", path)
display(df.limit(20))
display(df.groupBy("dt").count())

