# Power BI Semantic Layer Generator (v9)

This notebook generates another notebook that builds a full semantic layer in Databricks from your Power BI dataset.
Includes views, DAX measure translations (SQL or PySpark), joins, documentation, and optional Delta materialization.

In [None]:
# Configura tus credenciales Power BI y opciones de generación
client_id = "TU_CLIENT_ID"
client_secret = "TU_CLIENT_SECRET"
tenant_id = "TU_TENANT_ID"
workspace_id = "TU_WORKSPACE_ID"
dataset_id = "TU_DATASET_ID"
semantic_model_name = "Ventas"
materialize = True

# Instala dependencias si estás en Databricks
%pip install msal requests nbformat


In [None]:

import msal, requests
authority = f"https://login.microsoftonline.com/{tenant_id}"
app = msal.ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)
token = app.acquire_token_for_client(scopes=["https://analysis.windows.net/powerbi/api/.default"])
access_token = token["access_token"]
headers = {"Authorization": f"Bearer {access_token}"}


In [None]:

tables = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/tables", headers=headers).json().get("value", [])
relationships = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/relationships", headers=headers).json().get("value", [])
measures = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/measures", headers=headers).json().get("value", [])


In [None]:

from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
import nbformat
from textwrap import dedent

def convert_dax_to_pyspark(name, expr):
    expr = expr.replace(" ", "").upper()
    if "CALCULATE(" in expr and "FILTER(" in expr and "SUM(" in expr:
        col = expr.split("SUM(")[1].split(")")[0].split("[")[-1].split("]")[0]
        table = expr.split("SUM(")[1].split("[")[0]
        cond_col = expr.split("[")[-2].split("]")[0]
        cond_val = expr.split(">")[-1].replace("))", "")
        return dedent(f"""
            from pyspark.sql import functions as F
            df = spark.table("raw.{table}")
            df_filtered = df.filter(F.col("{cond_col}") > {cond_val})
            {name.lower()} = df_filtered.agg(F.sum("{col}")).collect()[0][0]
            print("{name}:", {name.lower()})
        """)
    elif "DISTINCTCOUNT(" in expr:
        col = expr.split("[")[-1].split("]")[0]
        table = expr.split("(")[1].split("[")[0]
        return f'df = spark.table("raw.{table}")\nresultado = df.select("{col}").distinct().count()'
    elif "AVERAGE(" in expr:
        col = expr.split("[")[-1].split("]")[0]
        table = expr.split("(")[1].split("[")[0]
        return f'df = spark.table("raw.{table}")\nresultado = df.agg(F.avg("{col}")).collect()[0][0]'
    elif "DIVIDE(" in expr:
        args = expr.split("DIVIDE(")[1].split(")")[0].split(",")
        num = args[0].split("[")[-1].split("]")[0]
        den = args[1].split("[")[-1].split("]")[0]
        table = args[0].split("[")[0]
        return dedent(f"""
            df = spark.table("raw.{table}")
            agg = df.agg(F.sum("{num}").alias("num"), F.sum("{den}").alias("den")).collect()[0]
            resultado = agg["num"] / agg["den"] if agg["den"] != 0 else None
        """)
    else:
        return f"# ⚠️ No se pudo traducir automáticamente: {expr}"

# Generar nuevo notebook
nb2 = new_notebook()
nb2.cells.append(new_markdown_cell(f"# 📊 Capa Semántica Generada: {semantic_model_name}"))

for t in tables:
    tname = t["name"]
    nb2.cells.append(new_markdown_cell(f"## Vista base: `{tname}`"))
    nb2.cells.append(new_code_cell(f"""CREATE OR REPLACE VIEW semantic.{tname} AS
SELECT * FROM raw.{tname};"""))
    nb2.cells.append(new_markdown_cell("### ✍️ Ejemplo de regla semántica"))
    nb2.cells.append(new_code_cell(f"""# from pyspark.sql import functions as F
# df = spark.table("semantic.{tname}")
# df = df.withColumn("BanderaAltaVenta", F.when(F.col("SalesAmount") > 1000, 1).otherwise(0))
# df.write.format("delta").mode("overwrite").saveAsTable("semantic.{tname}_ConReglas")
"""))

for r in relationships:
    f_table = r["fromTable"]
    f_col = r["fromColumn"]
    t_table = r["toTable"]
    t_col = r["toColumn"]
    view = f"vw_{f_table}"
    nb2.cells.append(new_markdown_cell(f"## Vista enriquecida: `{view}` con JOIN `{f_col}` = `{t_col}`"))
    nb2.cells.append(new_code_cell(f"""CREATE OR REPLACE VIEW semantic.{view} AS
SELECT f.*, d.*
FROM raw.{f_table} f
LEFT JOIN raw.{t_table} d ON f.{f_col} = d.{t_col};"""))
    if materialize:
        nb2.cells.append(new_code_cell(f"""CREATE OR REPLACE TABLE semantic.{f_table}_Materializada AS
SELECT * FROM semantic.{view};"""))

for m in measures:
    name = m["name"]
    expr = m["expression"]
    nb2.cells.append(new_markdown_cell(f"### Medida: `{name}`\n**DAX original:** `{expr}`"))
    if any(fn in expr.upper() for fn in ["SUM(", "COUNT(", "AVERAGE(", "DISTINCTCOUNT(", "CALCULATE(", "DIVIDE("]):
        pyspark = convert_dax_to_pyspark(name, expr)
        nb2.cells.append(new_code_cell(pyspark))
    else:
        nb2.cells.append(new_code_cell(f"# ⚠️ No se pudo traducir automáticamente: {expr}"))

# Guardar
path = f"/mnt/data/Capa_Semantica_{semantic_model_name}_v9.ipynb"
with open(path, "w") as f:
    nbformat.write(nb2, f)

print("✅ Notebook generado:", path)
