# 4. Run Benchmarks

Loop over the hard-coded parameter sets and trigger the `1.IngestData` notebook in each target workspace.
This notebook contains a %%configure cell with the run list (hard-coded from config/test_parameter_sets.yml).

Note: this runner requires a notebook-managed token (mssparkutils). There is no fallback to client credentials â€” it must run inside a Fabric workspace.

In [None]:
%%configure -f
{
  "conf": {
    "spark.notebook.parameters": "{\"runs\":[{\"name\":\"BFF-10k-LH-to-Delta-Full-Refresh\",\"dataset_name\":\"10k\",\"source\":\"lakehouse\",\"format\":\"delta\",\"update_strategy\":\"Full Refresh\"},{\"name\":\"BFF-10k-SQL-to-WH-Full-Compare\",\"dataset_name\":\"10k\",\"source\":\"sql\",\"format\":\"warehouse\",\"update_strategy\":\"Full Compare\"},{\"name\":\"BFF-1M-LH-to-WH-Increment\",\"dataset_name\":\"1m\",\"source\":\"lakehouse\",\"format\":\"warehouse\",\"update_strategy\":\"Full Refresh\"}]}"
  }
}


In [None]:
# Minimal runner: read the runs from spark.notebook.parameters and trigger each workspace's 1.IngestData notebook
import json, time
import requests

API_BASE = "https://api.fabric.microsoft.com/v1"
OAUTH_TIMEOUT = 30
UPLOAD_TIMEOUT = 60
POLL_SLEEP = 3
POLL_ATTEMPTS = 20

# Read runs from the %%configure cell
params_raw = None
try:
    params_raw = spark.conf.get('spark.notebook.parameters')
except Exception:
    params_raw = None
if not params_raw:
    raise SystemExit('spark.notebook.parameters not set. Ensure the %%configure cell is present and contains the runs list.')

params = json.loads(params_raw)
runs = params.get('runs', [])
if not runs:
    print('No runs found in spark.notebook.parameters.runs')

# Acquire AAD token via notebook-managed credentials (mssparkutils). No fallback.
try:
    from notebookutils import mssparkutils
    token = mssparkutils.credentials.getToken('https://api.fabric.microsoft.com/')
except Exception as e:
    raise SystemExit('Failed to obtain notebook-managed token via mssparkutils. This notebook must be run inside a Fabric workspace.')

if not token:
    raise SystemExit('mssparkutils returned no token. This notebook must be run inside a Fabric workspace.')

headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}

# Helper: find workspace id by displayName
def workspace_id_by_name(display_name):
    wr = requests.get(f'{API_BASE}/workspaces', headers=headers, timeout=30)
    wr.raise_for_status()
    for w in wr.json().get('value', []):
        if w.get('displayName') == display_name:
            return w.get('id')
    return None

# Helper: find item id for a notebook displayName in a workspace
def item_id_for_notebook(workspace_id, notebook_display):
    items_url = f'{API_BASE}/workspaces/{workspace_id}/items'
    ir = requests.get(items_url, headers=headers, timeout=30)
    ir.raise_for_status()
    for it in ir.json().get('value', []):
        if it.get('displayName') == notebook_display and it.get('type') == 'Notebook':
            return it.get('id')
    return None

# Minimal run: POST to /workspaces/{workspaceId}/items/{itemId}/run with parameters payload (best-effort)
def trigger_run(workspace_id, item_id, param_obj):
    run_url = f'{API_BASE}/workspaces/{workspace_id}/items/{item_id}/run'
    payload = {'parameters': json.dumps(param_obj)}
    rr = requests.post(run_url, headers=headers, json=payload, timeout=UPLOAD_TIMEOUT)
    return rr.status_code, (rr.text[:2000] if rr.text else '')

results = []
for run_cfg in runs:
    ws_name = run_cfg.get('name')
    if not ws_name:
        results.append({'workspace': None, 'status': 'skipped_no_name', 'cfg': run_cfg})
        continue
    print('\n==> Processing run for workspace:', ws_name)
    ws_id = workspace_id_by_name(ws_name)
    if not ws_id:
        print('  workspace not found:', ws_name)
        results.append({'workspace': ws_name, 'status': 'workspace_not_found'})
        continue
    print('  workspace_id:', ws_id)
    item_id = item_id_for_notebook(ws_id, '1.IngestData')
    if not item_id:
        print('  1.IngestData notebook not found in workspace:', ws_name)
        results.append({'workspace': ws_name, 'workspace_id': ws_id, 'status': 'notebook_not_found'})
        continue
    print('  found item_id:', item_id)
    status_code, text = trigger_run(ws_id, item_id, run_cfg)
    print('  trigger run response:', status_code)
    results.append({'workspace': ws_name, 'workspace_id': ws_id, 'item_id': item_id, 'status_code': status_code, 'response_text_preview': text})
    # small pause between triggers
    time.sleep(1)

print('\nRun summary:')
print(json.dumps(results, indent=2))
