****Attach lakehouse where you want to create your tables manually on the left by clicking "Add data items"****

In [None]:
# -----------------------------
# User configuration (edit these)
WORKSPACE_NAME        = "workspacename"
SEMANTIC_MODEL_NAME   = "semantic model name"
LAKEHOUSE_DATABASE    = "lakehousename.dbo"  # use just lakehouse name if there is no schema
INCLUDE_TABLES         = []
#INCLUDE_TABLES        = ["table1"]  # [] = discover all tables; or e.g., ["dim_customer", "fact_sales"]


# Optional mappings (Semantic Model â†’ Lakehouse)
TABLE_NAME_MAP = {
    # "Dim Customer": "dim_customer",
    # "Fact Sales":   "fact_sales"
}
COLUMN_NAME_MAP = { 
    # ("Dim Customer","CustomerId"): "cust_id",
    # ("Dim Customer","Customer Name"): "cust_name"
}


In [1]:
%pip install semantic-link-labs --quiet

StatementMeta(, 4ea5aeea-e5f9-4ab0-8ff7-40e63d63e9f6, 7, Finished, Available, Finished)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.



In [9]:
#imports & config
from pyspark.sql import functions as F
from pyspark.sql import types as T
import json

from sempy_labs.tom import connect_semantic_model


# -----------------------------
# Helper: resolve semantic model names
def map_table_name(sm_table: str) -> str:
    return TABLE_NAME_MAP.get(sm_table, sm_table)

def map_column_name(sm_table: str, sm_col: str) -> str:
    return COLUMN_NAME_MAP.get((sm_table, sm_col), sm_col)


StatementMeta(, 4ea5aeea-e5f9-4ab0-8ff7-40e63d63e9f6, 16, Finished, Available, Finished)

In [11]:
# harvest Lakehouse comments (table + column)
def get_table_and_column_comments(database: str, table_name: str):
    """
    Returns:
      table_comment: str or ""
      col_comment_map: dict[col_name] = comment_str
    """
    # DESCRIBE TABLE EXTENDED gives rows that include column details and properties; we filter for what we need.
    df = spark.sql(f"DESCRIBE TABLE EXTENDED {database}.{table_name}")
    df = df.fillna("")  # avoid None comparisons

    # Column-level comments are in rows where 'col_name' is a column and 'comment' holds text.
    # On Fabric/Spark, schema typical columns: col_name, data_type, comment
    cols_df = df.where((F.col("col_name") != "") & (F.col("data_type") != "")).select("col_name", "comment")

    col_comment_map = {}
    for r in cols_df.collect():
        c = (r["comment"] or "").strip()
        if c:
            col_comment_map[r["col_name"]] = c

    # Table-level comment lives in a property row where col_name == 'Comment' (varies by runtime)
    tbl_comment_row = df.where(F.col("col_name") == "Comment").select("data_type").limit(1).collect()
    table_comment = (tbl_comment_row[0]["data_type"].strip() if tbl_comment_row else "")

    return table_comment, col_comment_map

# Discover tables in database (or use INCLUDE_TABLES)
if INCLUDE_TABLES:
    lakehouse_tables = INCLUDE_TABLES
else:
    show_df = spark.sql(f"SHOW TABLES IN {LAKEHOUSE_DATABASE}")
    lakehouse_tables = [r["tableName"] for r in show_df.collect()]

print(f"Found {len(lakehouse_tables)} Lakehouse tables")
print(lakehouse_tables)



StatementMeta(, 4ea5aeea-e5f9-4ab0-8ff7-40e63d63e9f6, 18, Finished, Available, Finished)

Found 7 Lakehouse tables
['accident', 'adjuster', 'claim', 'driver_telemetry_data', 'policy', 'policyholder', 'vehicle']


In [None]:
updates_applied = []

# Loop through semantic model tables and columns
with connect_semantic_model(dataset=SEMANTIC_MODEL_NAME, workspace=WORKSPACE_NAME, readonly=False) as tom:
    for t in tom.model.Tables:
        lh_table = map_table_name(t.Name)
        #display(lh_table)
        
        #check if table is present in the config list
        if lh_table in lakehouse_tables:

            #Fetch the table and column comments from the lakehouse
            table_comment, col_comment_map = get_table_and_column_comments(LAKEHOUSE_DATABASE, lh_table)

            # Update table description
            if table_comment:
                t.Description = table_comment
                updates_applied.append(("TABLE", t.Name, table_comment))
            
                #Loop through columns
                for c in t.Columns:
                    lh_col = map_column_name(t.Name, c.Name)
                    #display(lh_col)

                    # Update column descriptions
                    if lh_col in col_comment_map:
                        col_description = col_comment_map.get(lh_col,"")
                        c.Description = col_description
                        updates_applied.append(("COLUMN", f"{t.Name}.{c.Name}", col_description))

tables_updated = [
    item[1]  # Get the table name (index 1)
    for item in updates_applied
    if item[0] == "TABLE"  # Check if the type (index 0) is 'TABLE'
]

print(f"Comments applied to these tables in the semantic model: '{SEMANTIC_MODEL_NAME}' : {tables_updated}")

columns_updated = [
    item[1]  # Get the table name (index 1)
    for item in updates_applied
    if item[0] == "COLUMN"  # Check if the type (index 0) is 'TABLE'
]

print(f"Comments applied to these columns in the semantic model: '{SEMANTIC_MODEL_NAME}' : {columns_updated}")


