In [None]:
%pip install -q pyarrow==21.0.0
%pip install -q deltalake==1.1.4
%pip install -q polars==1.33.1

import sys
sys.exit(0)

In [None]:
import yaml
import polars as pl

In [None]:
manifest_yaml = """
concepts:

  - name: part
    description: A part in the product catalog (p_part).

  - name: supplier
    description: A supplier company (s_supp).

  - name: customer
    description: A customer account (c_cust).

  - name: order
    description: Sales order header (o_order).

  - name: line
    description: Sales order line item (l_line).

  - name: partsupp
    description: Supplier’s offer for a specific part (ps_partsupp).

  - name: nation
    description: Nation for geo segmentation (n_nation).

  - name: region
    description: Region for geo segmentation (r_region).


keysets:

  - name: tpch:part:partkey
    concept: part
    source_system: tpch
    description: Native PART identifier from p_partkey

  - name: tpch:supplier:suppkey
    concept: supplier
    source_system: tpch
    description: Native SUPPLIER identifier from s_suppkey

  - name: tpch:customer:custkey
    concept: customer
    source_system: tpch
    description: Native CUSTOMER identifier from c_custkey

  - name: tpch:order:orderkey
    concept: order
    source_system: tpch
    description: Native ORDERS identifier from o_orderkey

  - name: tpch:line:orderkey:linenumber
    concept: line
    source_system: tpch
    description: Composed LINEITEM identifier from (l_orderkey, l_linenumber)

  - name: tpch:partsupp:partkey:suppkey
    concept: partsupp
    source_system: tpch
    description: Composed PARTSUPP identifier from (ps_partkey, ps_suppkey)

  - name: tpch:nation:nationkey
    concept: nation
    source_system: tpch
    description: Native NATION identifier from n_nationkey

  - name: tpch:region:regionkey
    concept: region
    source_system: tpch
    description: Native REGION identifier from r_regionkey


frames:

  - name: part
    source: tpch
    generate: True
    description: Catalog of parts.
    hooks:

      - name: hook__part
        concept: part
        keyset: tpch:part:partkey
        business_key_field: p_partkey

  - name: supplier
    source: tpch
    generate: True
    description: Supplier master data.
    hooks:

      - name: hook__supplier
        concept: supplier
        keyset: tpch:supplier:suppkey
        business_key_field: s_suppkey

      - name: hook__nation
        concept: nation
        keyset: tpch:nation:nationkey
        business_key_field: s_nationkey

  - name: customer
    source: tpch
    generate: True
    description: Customer master data.
    hooks:

      - name: hook__customer
        concept: customer
        keyset: tpch:customer:custkey
        business_key_field: c_custkey

      - name: hook__nation
        concept: nation
        keyset: tpch:nation:nationkey
        business_key_field: c_nationkey

  - name: orders
    source: tpch
    generate: True
    description: Orders header.
    hooks:

      - name: hook__order
        concept: order
        keyset: tpch:order:orderkey
        business_key_field: o_orderkey

      - name: hook__customer
        concept: customer
        keyset: tpch:customer:custkey
        business_key_field: o_custkey

  - name: lineitem
    source: tpch
    generate: True
    description: Order line facts.
    hooks:

      - name: hook__line
        concept: line
        keyset: tpch:line:orderkey:linenumber
        business_key_field: CONCAT(CAST(l_orderkey AS STRING), '|', CAST(l_linenumber AS STRING))

      - name: hook__order
        concept: order
        keyset: tpch:order:orderkey
        business_key_field: l_orderkey

      - name: hook__part
        concept: part
        keyset: tpch:part:partkey
        business_key_field: l_partkey

      - name: hook__supplier
        concept: supplier
        keyset: tpch:supplier:suppkey
        business_key_field: l_suppkey

  - name: partsupp
    source: tpch
    generate: True
    description: Supplier-part offers with costs and availability.
    hooks:

      - name: hook__partsupp
        concept: partsupp
        keyset: tpch:partsupp:partkey:suppkey
        business_key_field: CONCAT(CAST(ps_partkey AS STRING), '|', CAST(ps_suppkey AS STRING))

      - name: hook__part
        concept: part
        keyset: tpch:part:partkey
        business_key_field: ps_partkey

      - name: hook__supplier
        concept: supplier
        keyset: tpch:supplier:suppkey
        business_key_field: ps_suppkey

  - name: nation
    source: tpch
    generate: True
    description: Nation dimension.
    hooks:

      - name: hook__nation
        concept: nation
        keyset: tpch:nation:nationkey
        business_key_field: n_nationkey

      - name: hook__region
        concept: region
        keyset: tpch:region:regionkey
        business_key_field: n_regionkey

  - name: region
    source: tpch
    generate: True
    description: Region dimension.
    hooks:

      - name: hook__region
        concept: region
        keyset: tpch:region:regionkey
        business_key_field: r_regionkey
"""

In [None]:
manifest = yaml.safe_load(manifest_yaml)
abfs_path = notebookutils.lakehouse.get().get("properties").get("abfsPath")
df = pl.DataFrame(manifest["frames"])

df.write_delta(
    target=f"{abfs_path}/Tables/metadata/manifest",
    mode="overwrite",
    storage_options={"allow_unsafe_rename": "true"},
    delta_write_options={"schema_mode": "overwrite"},
)