In [0]:
from pyspark.sql import functions as F, types as T

dbutils.widgets.text("dt", "")
dbutils.widgets.dropdown(
    "target",
    "all",
    [
        "all",
        "supply_stake_lock_daily",
        "unlock_buckets_daily",
        "locked_holder_distribution_daily",
        "lock_calendar_daily",
    ],
)
dbutils.widgets.dropdown("debug", "false", ["false", "true"])

dt = dbutils.widgets.get("dt").strip()
target = dbutils.widgets.get("target")
debug = dbutils.widgets.get("debug") == "true"

if not dt:
    raise ValueError("dt is required (YYYY-MM-DD)")

CATALOG = "optio_warehouse"
SILVER = f"{CATALOG}.silver"
GOLD = f"{CATALOG}.gold"

DEC_UOPT = T.DecimalType(38, 0)
UOPT_PER_OPT = 1_000_000


In [0]:
def overwrite_dt_partition(df, full_table_name: str, dt: str):
    (df.write.format("delta")
       .mode("overwrite")
       .option("replaceWhere", f"dt = '{dt}'")
       .saveAsTable(full_table_name))

def silver_chain(dt: str):
    return spark.table(f"{SILVER}.silver_chain_daily_state").where(F.col("dt") == dt)

def silver_locks(dt: str):
    return spark.table(f"{SILVER}.silver_active_locks_daily").where(F.col("dt") == dt)


In [0]:
# validation helpers
def require_non_empty(df, label: str):
    if not df.take(1):
        raise ValueError(f"{label}: expected at least 1 row for dt={dt}, found 0")

def require_exactly_one_row(df, label: str):
    rows = df.take(2)  # short-circuit
    if len(rows) != 1:
        raise ValueError(f"{label}: expected exactly 1 row for dt={dt}, found {len(rows)}")

def require_no_nulls(df, cols: list, label: str):
    for c in cols:
        bad_row = df.where(F.col(c).isNull()).select("dt", c).take(1)
        if bad_row:
            raise ValueError(
                f"{label}: null found in required column '{c}' for dt={dt}. Sample={bad_row[0].asDict()}"
            )

def require_non_negative(df, cols: list, label: str):
    for c in cols:
        bad_row = df.where(F.col(c) < F.lit(0)).select("dt", c).take(1)
        if bad_row:
            raise ValueError(
                f"{label}: negative value in '{c}' for dt={dt}. Sample={bad_row[0].asDict()}"
            )

def sum_decimal(df, col: str):
    r = df.agg(F.sum(F.col(col).cast(DEC_UOPT)).alias("s")).collect()[0]["s"]
    return r if r is not None else 0

In [0]:
GOLD_SUPPLY = f"{GOLD}.gold_supply_stake_lock_daily"

def build_gold_supply_stake_lock_daily(dt: str):
    s = (spark.table(f"{SILVER}.silver_chain_daily_state")
            .where(F.col("dt") == dt))

    require_exactly_one_row(s.select("dt"), "silver_chain_daily_state")

    out = (s.select(
                F.col("dt"),
                F.col("total_supply_uopt").cast(DEC_UOPT).alias("total_supply_uopt"),
                F.col("bonded_uopt").cast(DEC_UOPT).alias("staked_uopt"),
                F.col("total_locked_uopt").cast(DEC_UOPT).alias("locked_uopt"),
                (
                    F.col("total_supply_uopt").cast(DEC_UOPT)
                    - F.col("bonded_uopt").cast(DEC_UOPT)
                ).alias("liquid_est_uopt"),
                F.col("source_run_ids").alias("source_run_ids"),
            )
            .withColumn("computed_at", F.current_timestamp())
          )

    # sanity check: liquid should not be negative
    require_non_negative(out, ["liquid_est_uopt"], "gold_supply_stake_lock_daily")

    bad2 = (out
        .where(F.col("locked_uopt") > F.col("staked_uopt"))
        .select("dt", "locked_uopt", "staked_uopt")
        .take(1)
    )
    if bad2:
        raise ValueError(f"Expected locked_uopt <= staked_uopt for dt={dt}. Sample={bad2[0].asDict()}")


    return out


In [0]:
GOLD_BUCKETS = f"{GOLD}.gold_unlock_buckets_daily"

def build_gold_unlock_buckets_daily(dt: str):
    a = silver_locks(dt).select(
        "dt",
        F.col("unlock_date").cast("date").alias("unlock_date"),
        F.col("amount_uopt").cast(DEC_UOPT).alias("amount_uopt"),
        "address",
    )

    if debug:
        print("silver_locks rows:", a.count())

    base = (a
        .withColumn("days_to_unlock_raw", F.datediff(F.col("unlock_date"), F.to_date(F.lit(dt))))
        # clamp negative values to 0: already-unlockable treated as immediate
        .withColumn("days_to_unlock", F.when(F.col("days_to_unlock_raw") < 0, F.lit(0)).otherwise(F.col("days_to_unlock_raw")))
    )

    MAX_24M_DAYS = 730
    GRACE_DAYS = 7
    THRESH_24M = MAX_24M_DAYS - GRACE_DAYS  # 723

    bucket = (F.when(F.col("days_to_unlock") <= 7,  F.lit("<1W"))
                .when(F.col("days_to_unlock") <= 30, F.lit("<1M"))
                .when(F.col("days_to_unlock") <= 180, F.lit("<6M"))
                .when(F.col("days_to_unlock") <= 365, F.lit("<12M"))
                .when(F.col("days_to_unlock") <= 548, F.lit("<18M"))
                .when(F.col("days_to_unlock") < THRESH_24M, F.lit("<24M"))
                .otherwise(F.lit("24M")) # 24M bucket includes >= 723 days remaining (7-day grace)
             )

    out = (base
        .withColumn("unlock_bucket", bucket)
        .groupBy("dt", "unlock_bucket")
        .agg(
            F.sum("amount_uopt").alias("locked_uopt"),
            F.count(F.lit(1)).alias("lock_count"),
            F.countDistinct("address").alias("wallet_count"),
        )
        .withColumn("computed_at", F.current_timestamp())
    )

    return out


In [0]:
GOLD_DIST = f"{GOLD}.gold_locked_holder_distribution_daily"

def build_gold_locked_holder_distribution_daily(dt: str):
    a = silver_locks(dt).select(
        "dt",
        "address",
        F.col("amount_uopt").cast(DEC_UOPT).alias("amount_uopt"),
    )

    by_addr = a.groupBy("dt", "address").agg(F.sum("amount_uopt").alias("total_locked_uopt"))

    # thresholds in uOPT
    t10k   = 10_000 * UOPT_PER_OPT
    t100k  = 100_000 * UOPT_PER_OPT
    t1m    = 1_000_000 * UOPT_PER_OPT
    t10m   = 10_000_000 * UOPT_PER_OPT
    t50m   = 50_000_000 * UOPT_PER_OPT
    t100m  = 100_000_000 * UOPT_PER_OPT

    bucket = (F.when(F.col("total_locked_uopt") < F.lit(t10k),   F.lit("<10k"))
                .when(F.col("total_locked_uopt") < F.lit(t100k), F.lit("10k-100k"))
                .when(F.col("total_locked_uopt") < F.lit(t1m),   F.lit("100k-1M"))
                .when(F.col("total_locked_uopt") < F.lit(t10m),  F.lit("1M-10M"))
                .when(F.col("total_locked_uopt") < F.lit(t50m),  F.lit("10M-50M"))
                .when(F.col("total_locked_uopt") < F.lit(t100m), F.lit("50M-100M"))
                .otherwise(F.lit("100M+"))
             )

    out = (by_addr
        .withColumn("holding_bucket", bucket)
        .groupBy("dt", "holding_bucket")
        .agg(
            F.count(F.lit(1)).alias("wallet_count"),
            F.sum("total_locked_uopt").alias("total_locked_uopt"),
        )
        .withColumn("computed_at", F.current_timestamp())
    )

    return out


In [0]:
GOLD_CAL = f"{GOLD}.gold_lock_calendar_daily"

def build_gold_lock_calendar_daily(dt: str):
    a = silver_locks(dt).select(
        "dt",
        F.col("unlock_date").cast("date").alias("unlock_date"),
        F.col("amount_uopt").cast(DEC_UOPT).alias("amount_uopt"),
        "address",
    )

    base = a.withColumn("days_to_unlock", F.datediff(F.col("unlock_date"), F.to_date(F.lit(dt))))

    out = (base
        .groupBy("dt", "unlock_date", "days_to_unlock")
        .agg(
            F.sum("amount_uopt").alias("unlocking_uopt"),
            F.countDistinct("address").alias("wallet_count"),
            F.count(F.lit(1)).alias("lock_count"),
        )
        .withColumn("computed_at", F.current_timestamp())
    )

    return out


In [0]:
# validation functions

def validate_gold_supply(dt: str):
    g = spark.table(GOLD_SUPPLY).where(F.col("dt") == dt)

    require_exactly_one_row(g, "gold_supply_stake_lock_daily")
    require_no_nulls(
        g,
        ["total_supply_uopt", "staked_uopt", "locked_uopt", "liquid_est_uopt"],
        "gold_supply_stake_lock_daily",
    )
    require_non_negative(
        g,
        ["total_supply_uopt", "staked_uopt", "locked_uopt", "liquid_est_uopt"],
        "gold_supply_stake_lock_daily",
    )

    bad = (g.where(F.col("liquid_est_uopt") > F.col("total_supply_uopt"))
       .select("dt", "liquid_est_uopt", "total_supply_uopt")
       .take(1))
    if bad:
        raise ValueError('Sample={bad[0].asDict()}')

    bad2 = (g.where(F.col("locked_uopt") > F.col("staked_uopt"))
            .select("dt", "locked_uopt", "staked_uopt")
            .take(1))
    if bad2:
        raise ValueError(f'Sample={bad2[0].asDict()}')


def validate_gold_unlock_buckets(dt: str):
    g = spark.table(GOLD_BUCKETS).where(F.col("dt") == dt)
    require_non_empty(g, "gold_unlock_buckets_daily")
    require_no_nulls(g, ["unlock_bucket", "locked_uopt", "lock_count", "wallet_count"], "gold_unlock_buckets_daily")
    require_non_negative(g, ["locked_uopt", "lock_count", "wallet_count"], "gold_unlock_buckets_daily")

    # Total locked in buckets should equal total locked in Silver locks for that dt
    silver_total = sum_decimal(silver_locks(dt), "amount_uopt")
    gold_total = sum_decimal(g, "locked_uopt")

    if gold_total != silver_total:
        raise ValueError(
            f"gold_unlock_buckets_daily: total mismatch for dt={dt}. "
            f"gold_locked_uopt={gold_total}, silver_locked_uopt={silver_total}"
        )

def validate_gold_holder_dist(dt: str):
    g = spark.table(GOLD_DIST).where(F.col("dt") == dt)
    require_non_empty(g, "gold_locked_holder_distribution_daily")
    require_no_nulls(g, ["holding_bucket", "wallet_count", "total_locked_uopt"], "gold_locked_holder_distribution_daily")
    require_non_negative(g, ["wallet_count", "total_locked_uopt"], "gold_locked_holder_distribution_daily")

    silver_total = sum_decimal(silver_locks(dt), "amount_uopt")
    gold_total = sum_decimal(g, "total_locked_uopt")

    if gold_total != silver_total:
        raise ValueError(
            f"gold_locked_holder_distribution_daily: total mismatch for dt={dt}. "
            f"gold_total_locked_uopt={gold_total}, silver_locked_uopt={silver_total}"
        )

def validate_gold_calendar(dt: str):
    g = spark.table(GOLD_CAL).where(F.col("dt") == dt)
    require_non_empty(g, "gold_lock_calendar_daily")
    require_no_nulls(g, ["unlock_date", "days_to_unlock", "unlocking_uopt", "wallet_count", "lock_count"], "gold_lock_calendar_daily")
    require_non_negative(g, ["unlocking_uopt", "wallet_count", "lock_count"], "gold_lock_calendar_daily")

    silver_total = sum_decimal(silver_locks(dt), "amount_uopt")
    gold_total = sum_decimal(g, "unlocking_uopt")

    if gold_total != silver_total:
        raise ValueError(
            f"gold_lock_calendar_daily: total mismatch for dt={dt}. "
            f"gold_unlocking_uopt={gold_total}, silver_locked_uopt={silver_total}"
        )

In [0]:
# Executions and validations

if target in ("all", "supply_stake_lock_daily"):
    df = build_gold_supply_stake_lock_daily(dt)
    overwrite_dt_partition(df, GOLD_SUPPLY, dt)
    validate_gold_supply(dt)

if target in ("all", "unlock_buckets_daily"):
    df = build_gold_unlock_buckets_daily(dt)
    overwrite_dt_partition(df, GOLD_BUCKETS, dt)
    validate_gold_unlock_buckets(dt)

if target in ("all", "locked_holder_distribution_daily"):
    df = build_gold_locked_holder_distribution_daily(dt)
    overwrite_dt_partition(df, GOLD_DIST, dt)
    validate_gold_holder_dist(dt)

if target in ("all", "lock_calendar_daily"):
    df = build_gold_lock_calendar_daily(dt)
    overwrite_dt_partition(df, GOLD_CAL, dt)
    validate_gold_calendar(dt)


