In [None]:
%%configure -f
{
  "conf": {
    "spark.notebook.parameters": "{\"DATASETS_PARAM\": [{\"name\": \"10k\", \"row_count\": 10000, \"change_fraction\": 0.01, \"new_fraction\": 0.005, \"delete_fraction\": 0.001, \"seed\": 42, \"description\": \"Small baseline dataset (10k rows)\"}, {\"name\": \"1m\", \"row_count\": 1000000, \"change_fraction\": 0.01, \"new_fraction\": 0.005, \"delete_fraction\": 0.001, \"seed\": 42, \"description\": \"Scale test dataset (1M rows)\"}], \"PUSH_TO_AZURE_SQL\": true, \"AZURE_SQL_SERVER\": \"benchmarking-bff\", \"AZURE_SQL_DB\": \"benchmarking\", \"AZURE_SQL_SCHEMA\": \"dbo\", \"distribution\": \"uniform\", \"seed\": 42}"
  },
  "defaultLakehouse": {
    "name": "DataSourceLakehouse"
  }
}


# 0. Generate Data
## Minimal parquet writer + Azure SQL push

Parameters are supplied via spark.notebook.parameters in the %%configure cell. Token-based Azure SQL pushes use mssparkutils.credentials.getToken so no secrets or connection string credentials are required.

In [None]:
# Minimal schema_config used by the generator
schema_config = {
    "categorical_fields": 3,
    "numeric_fields": 5,
    "timestamp_fields": 2
}


In [None]:
# Unpack parameters from SparkConf (set via the %%configure -f cell). Keep this strict: no hidden defaults.
import json
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
conf_key = "spark.notebook.parameters"
conf_str = None
try:
    conf_str = spark.conf.get(conf_key, None)
except Exception:
    conf_str = None
if not conf_str:
    try:
        conf_str = spark.sparkContext.getConf().get(conf_key, None)
    except Exception:
        conf_str = None
if not conf_str:
    raise RuntimeError("spark.notebook.parameters not found in SparkConf. Ensure the %%configure -f cell ran and the session was restarted.")

params = json.loads(conf_str)

# Assign only what's passed in; let missing/malformed values raise errors upstream.
DATASETS_PARAM = params["DATASETS_PARAM"]
PUSH_TO_AZURE_SQL = params.get("PUSH_TO_AZURE_SQL", False)
AZURE_SQL_SERVER = params.get("AZURE_SQL_SERVER")
AZURE_SQL_DB = params.get("AZURE_SQL_DB")
AZURE_SQL_SCHEMA = params.get("AZURE_SQL_SCHEMA", "dbo")
distribution = params.get("distribution", "uniform")
seed = params.get("seed", 42)

# Ensure AZURE_SQL_SERVER is a fully-qualified host name for Azure SQL (add .database.windows.net if omitted)
if AZURE_SQL_SERVER:
    if not AZURE_SQL_SERVER.lower().endswith(".database.windows.net"):
        AZURE_SQL_SERVER = AZURE_SQL_SERVER.rstrip(".") + ".database.windows.net"

# Optional quick DNS/TCP check for visibility (non-fatal; logs a warning but doesn't stop the notebook)
try:
    import socket
    _info = socket.getaddrinfo(AZURE_SQL_SERVER, 1433)
    # quick TCP connection test (short timeout)
    s = socket.create_connection((AZURE_SQL_SERVER, 1433), timeout=5)
    s.close()
    print(f"AZURE_SQL_SERVER resolved and TCP 1433 OK: {AZURE_SQL_SERVER}")
except Exception as _e:
    # Non-fatal: warn the user so they can fix parameters or DNS if needed
    print("AZURE_SQL_SERVER DNS/TCP check warning:", _e)

print(f"Loaded parameters: datasets={[d['name'] for d in DATASETS_PARAM]}, PUSH_TO_AZURE_SQL={PUSH_TO_AZURE_SQL}, AZURE_SQL_SERVER={AZURE_SQL_SERVER}, AZURE_SQL_DB={AZURE_SQL_DB}")

globals().update({
    "DATASETS_PARAM": DATASETS_PARAM,
    "PUSH_TO_AZURE_SQL": PUSH_TO_AZURE_SQL,
    "AZURE_SQL_SERVER": AZURE_SQL_SERVER,
    "AZURE_SQL_DB": AZURE_SQL_DB,
    "AZURE_SQL_SCHEMA": AZURE_SQL_SCHEMA,
    "distribution": distribution,
    "seed": seed
})


In [None]:
import os
import numpy as np
import pandas as pd
import random
import pytz
from datetime import datetime, timedelta
from pathlib import Path

print('Imports OK — proceeding with generation')


In [None]:
# Helper: ensure timestamp columns are timezone-naive UTC before writing/parquet or pushing to SQL
import datetime as _datetime
def ensure_naive_utc(df_in):
    """Convert any ts_* columns to timezone-naive UTC datetimes in-place and return df.
    - If column is tz-aware: convert to UTC then drop tzinfo.
    - If column is naive: assume it's UTC (leave as naive) but coerce to datetime.
    """
    for col in list(df_in.columns):
        if col.startswith("ts_"):
            # coerce to datetimes first
            s = pd.to_datetime(df_in[col], errors="coerce")
            # If series is tz-aware, convert to UTC then drop tzinfo -> naive UTC
            try:
                if getattr(s.dt, "tz", None) is not None:
                    s = s.dt.tz_convert("UTC").dt.tz_localize(None)
            except Exception:
                # Fallback per-value (handles mixed or unexpected types)
                def _to_naive_utc(v):
                    if pd.isna(v):
                        return v
                    if getattr(v, 'tzinfo', None) is not None:
                        return v.astimezone(_datetime.timezone.utc).replace(tzinfo=None)
                    return v
                s = s.apply(_to_naive_utc)
            # If naive, assume already UTC; store back
            df_in[col] = s
    return df_in

print('ensure_naive_utc helper ready')


In [None]:
def create_synthetic_schema(config):
    # deterministic seeds are set outside so runs are reproducible when seed is provided
    np.random.seed(seed)
    random.seed(seed)
    schema = {}
    for i in range(config["categorical_fields"]):
        schema[f"cat_{i+1}"] = lambda n, _choices=['A','B','C','D']: np.random.choice(_choices, size=n)
    for i in range(config["numeric_fields"]):
        schema[f"num_{i+1}"] = lambda n: np.random.uniform(0, 1000, size=n)
    for i in range(config["timestamp_fields"]):
        # generate naive timestamps; we'll localize to UTC in the dataframe to ensure timezone-compatibility
        start = pd.Timestamp("2023-01-01")
        schema[f"ts_{i+1}"] = lambda n: pd.date_range(start, periods=n, freq="ms")
    return schema

schema = create_synthetic_schema(schema_config)


In [None]:
def generate_base_dataframe(row_count, schema, distribution, seed):
    """Parameters are required — no defaults. Prints inputs for traceability.
    This version enforces dtypes at generation time so downstream push logic can be minimal.
    """
    print(f"generate_base_dataframe called with row_count={row_count}, distribution={distribution}, seed={seed}")
    np.random.seed(seed)
    data = {}
    for col, gen in schema.items():
        if "num" in col and distribution == "skewed":
            data[col] = np.random.exponential(500, size=row_count)
        elif "num" in col and distribution == "null_injection":
            col_data = gen(row_count)
            null_mask = np.random.rand(row_count) < 0.05
            col_data[null_mask] = np.nan
            data[col] = col_data
        else:
            data[col] = gen(row_count)
    df = pd.DataFrame(data)
    # enforce id as int64
    df["id"] = np.arange(1, row_count + 1).astype("int64")
    # Timestamps: coerce and ensure timezone-awareness where necessary (generator intent) — will be normalized to naive UTC before write
    for col in df.columns:
        if col.startswith("ts_"):
            df[col] = pd.to_datetime(df[col], errors='coerce')
            # if naive, localize to UTC (we'll normalize to naive UTC later)
            if getattr(df[col].dt, 'tz', None) is None:
                df[col] = df[col].dt.tz_localize(pytz.UTC)
    # Numeric fields -> float64 (NaN used for missing)
    for col in df.columns:
        if col.startswith("num_"):
            df[col] = pd.to_numeric(df[col], errors='coerce').astype("float64")
    # Categorical fields -> pandas string dtype (keeps NA behavior)
    for col in df.columns:
        if col.startswith("cat_"):
            df[col] = df[col].astype("string")
    df["update_type"] = "insert"
    return df


In [None]:
def generate_updates(df, change_pct, new_pct, delete_pct, seed):
    """Parameters are required — no defaults. Prints inputs for traceability.
    Generates changed rows, new inserted rows, and delete marker rows that preserve column structure.
    """
    print(f"generate_updates called with rows={len(df)}, change_pct={change_pct}, new_pct={new_pct}, delete_pct={delete_pct}, seed={seed}")
    np.random.seed(seed)
    random.seed(seed)
    n = len(df)

    # changed rows
    changed_count = int(n * change_pct)
    if changed_count > 0:
        change_idx = np.random.choice(df.index, changed_count, replace=False)
        changed_df = df.loc[change_idx].copy()
        if "num_1" in changed_df.columns:
            changed_df["num_1"] = changed_df["num_1"] + np.random.uniform(1, 10, changed_count)
        changed_df["update_type"] = "update"
    else:
        changed_df = pd.DataFrame(columns=df.columns.tolist() + ["update_type"])[:0]

    # new rows (inserts)
    new_count = int(n * new_pct)
    if new_count > 0:
        new_df = df.sample(new_count).copy()
        new_df["id"] = np.arange(n + 1, n + new_count + 1)
        new_df["update_type"] = "insert"
    else:
        new_df = pd.DataFrame(columns=df.columns.tolist() + ["update_type"])[:0]

    # deletes: produce rows with same columns but only id and update_type populated; others NA
    delete_count = int(n * delete_pct)
    if delete_count > 0:
        delete_idx = np.random.choice(df.index, delete_count, replace=False)
        # create a DataFrame with the same columns, filled with NA, then populate id and update_type
        deleted_df = pd.DataFrame({c: [pd.NA] * delete_count for c in df.columns})
        # set id values for deleted rows (preserve order)
        deleted_df["id"] = df.loc[delete_idx, "id"].values
        deleted_df["update_type"] = "delete"
        # ensure column order matches original df and include update_type
        if "update_type" not in df.columns:
            # maintain original df columns then append update_type at the end
            deleted_df = deleted_df[df.columns.tolist()]
            deleted_df["update_type"] = "delete"
    else:
        deleted_df = pd.DataFrame(columns=df.columns.tolist() + ["update_type"])[:0]

    # combine and shuffle
    updates = pd.concat([changed_df, new_df, deleted_df], ignore_index=True, sort=False)
    if not updates.empty:
        updates = updates.sample(frac=1, random_state=seed).reset_index(drop=True)
    return updates


In [None]:
# Concise push helper that trusts generation-time schema enforcement
import struct
import time
import datetime
import pyodbc
import numpy as np
import pandas as pd
from notebookutils import mssparkutils

SQL_COPT_SS_ACCESS_TOKEN = 1256

def _token_struct():
    t = mssparkutils.credentials.getToken("https://database.windows.net/")
    exptoken = b"".join(bytes([c]) + b"\x00" for c in t.encode("utf-8"))
    return struct.pack("=i", len(exptoken)) + exptoken

def _col_type_from_name(col):
    if col == "id":
        return "BIGINT"
    if col.startswith("num_"):
        return "FLOAT"
    if col.startswith("cat_"):
        return "NVARCHAR(100)"
    if col.startswith("ts_"):
        # Use DATETIME2 (no timezone) for broader client compatibility during fast ingestion
        return "DATETIME2"
    if col == "update_type":
        return "NVARCHAR(32)"
    return "NVARCHAR(MAX)"

def _py_val(v):
    if pd.isna(v):
        return None
    if isinstance(v, pd.Timestamp):
        # convert pandas Timestamp to timezone-naive UTC datetime for DATETIME2 column
        dt = v.to_pydatetime()
        if getattr(dt, 'tzinfo', None) is not None:
            # normalize to UTC then drop tzinfo
            dt = dt.astimezone(datetime.timezone.utc).replace(tzinfo=None)
        return dt
    if isinstance(v, (np.integer, np.int64, np.int32)):
        return int(v)
    if isinstance(v, (np.floating, np.float64, np.float32)):
        return float(v)
    if isinstance(v, (np.bool_,)):
        return bool(v)
    return v

def _pyodbc_conn_with_retry(server=None, database=None, timeout=120, retries=2, backoff=2):
    server = server or globals().get("AZURE_SQL_SERVER")
    database = database or globals().get("AZURE_SQL_DB")
    if not server or not database:
        raise RuntimeError("AZURE_SQL_SERVER and AZURE_SQL_DB must be set (or passed in)")
    conn_str = (
        "Driver={ODBC Driver 18 for SQL Server};"
        f"Server=tcp:{server},1433;"
        f"Database={database};"
        "Encrypt=yes;TrustServerCertificate=no;"
    )
    last_exc = None
    for attempt in range(1, retries + 1):
        try:
            return pyodbc.connect(conn_str, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: _token_struct()}, timeout=timeout)
        except Exception as e:
            last_exc = e
            if attempt < retries:
                time.sleep(backoff * attempt)
            else:
                raise
    raise last_exc

def push_df_concise(df, table_name, schema_name="dbo", server=None, database=None):
    """Push dataframe to Azure SQL using concise, schema-driven rules.
    Assumes generate_base_dataframe and generate_updates have already enforced dtypes.
    Returns number of rows inserted.
    """
    df2 = df.copy()
    # minimal sanitization: empty-string -> NA
    for c in df2.columns:
        if df2[c].dtype == object:
            df2[c] = df2[c].replace("", pd.NA)
    # DDL from naming convention
    cols_ddl = [f"[{c}] {_col_type_from_name(c)} NULL" for c in df2.columns]
    full_table = f"{schema_name}.{table_name}"
    create_sql = f"IF OBJECT_ID(N'{full_table}', 'U') IS NOT NULL DROP TABLE {full_table}; CREATE TABLE {full_table} ({', '.join(cols_ddl)});"
    conn = _pyodbc_conn_with_retry(server=server, database=database)
    cur = conn.cursor()
    cur.execute(create_sql)
    conn.commit()
    col_names = ['[' + c.replace('"','') + ']' for c in df2.columns]
    placeholders = ", ".join("?" for _ in col_names)
    insert_sql = f"INSERT INTO {full_table} ({', '.join(col_names)}) VALUES ({placeholders})"
    records = [tuple(_py_val(v) for v in row) for row in df2.itertuples(index=False, name=None)]
    cur.fast_executemany = True
    cur.executemany(insert_sql, records)
    conn.commit()
    cur.close()
    conn.close()
    return len(records)


In [None]:
# Token-based fast uploader helper for large datasets (uses mssparkutils token + pyodbc fast_executemany)
import math
import time
import struct
import datetime as _datetime
import numpy as _np
import pandas as _pd

def upload_df_token_fast(df, table_name, schema_name="dbo", server=None, database=None, batch_size=5000, driver='ODBC Driver 18 for SQL Server'):
    """Upload a pandas DataFrame to Azure SQL using token-based pyodbc and batched fast_executemany.
    - Drops and recreates the target table to match df columns.
    - Uses mssparkutils.credentials.getToken for an AAD token and passes it via attrs_before.
    - Batches inserts to reduce memory and commit overhead.
    Returns number of rows inserted.
    """
    # local imports to keep global namespace clean
    from notebookutils import mssparkutils
    import pyodbc

    server = server or globals().get('AZURE_SQL_SERVER')
    database = database or globals().get('AZURE_SQL_DB')
    if not server or not database:
        raise RuntimeError('server and database must be provided or set in globals')

    # normalize timestamp columns to timezone-naive UTC (reuse ensure_naive_utc logic if present)
    try:
        # try to use ensure_naive_utc defined earlier
        df = ensure_naive_utc(df)
    except Exception:
        # fallback localized conversion
        for c in df.columns:
            if c.startswith('ts_'):
                s = _pd.to_datetime(df[c], errors='coerce')
                try:
                    if getattr(s.dt, 'tz', None) is not None:
                        s = s.dt.tz_convert('UTC').dt.tz_localize(None)
                except Exception:
                    def _to_naive_utc(v):
                        if _pd.isna(v):
                            return v
                        if getattr(v, 'tzinfo', None) is not None:
                            return v.astimezone(_datetime.timezone.utc).replace(tzinfo=None)
                        return v
                    s = s.apply(_to_naive_utc)
                df[c] = s

    # build DDL
    def _col_type(col):
        if col == 'id':
            return 'BIGINT'
        if col.startswith('num_'):
            return 'FLOAT'
        if col.startswith('cat_'):
            return 'NVARCHAR(100)'
        if col.startswith('ts_'):
            return 'DATETIME2'
        if col == 'update_type':
            return 'NVARCHAR(32)'
        return 'NVARCHAR(MAX)'

    cols_ddl = ', '.join(f'[{c}] {_col_type(c)} NULL' for c in df.columns)
    full_table = f"{schema_name}.{table_name}"
    create_sql = f"IF OBJECT_ID(N'{full_table}', 'U') IS NOT NULL DROP TABLE {full_table}; CREATE TABLE {full_table} ({cols_ddl});"

    # get token and build token struct for pyodbc
    t = mssparkutils.credentials.getToken('https://database.windows.net/')
    exptoken = b"".join(bytes([c]) + b"\x00" for c in t.encode('utf-8'))
    token_struct = struct.pack('=i', len(exptoken)) + exptoken

    conn_str = (
        f"Driver={{{driver}}};"
        f"Server=tcp:{server},1433;"
        f"Database={database};"
        "Encrypt=yes;TrustServerCertificate=no;"
        "ConnectRetryCount=4;"
    )
    # open connection
    conn = pyodbc.connect(conn_str, attrs_before={1256: token_struct}, timeout=300)
    cur = conn.cursor()

    # makes sure Azure SQL Database is awake
    deadline = time.time() + 60
    while time.time() < deadline:
        try:
            cur.execute("SELECT DB_NAME() AS db, SUSER_SNAME() AS user_name")
            row = cur.fetchone()
            if row:
                print(f"    Verified SQL connection: db={row[0]}, user={row[1]}")
                break
        except Exception:
            time.sleep(5)
    else:
        raise RuntimeError("Timed out waiting for Azure SQL")
        
    # create/replace table
    cur.execute(create_sql)
    conn.commit()

    # prepare insert
    col_names = ['[' + c.replace('"','') + ']' for c in df.columns]
    placeholders = ', '.join('?' for _ in col_names)
    insert_sql = f"INSERT INTO {full_table} ({', '.join(col_names)}) VALUES ({placeholders})"

    def _to_py(v):
        if _pd.isna(v):
            return None
        if isinstance(v, _pd.Timestamp):
            dt = v.to_pydatetime()
            if getattr(dt, 'tzinfo', None) is not None:
                dt = dt.astimezone(_datetime.timezone.utc).replace(tzinfo=None)
            return dt
        if isinstance(v, (_np.integer,)):
            return int(v)
        if isinstance(v, (_np.floating,)):
            return float(v)
        if isinstance(v, (_np.bool_,)):
            return bool(v)
        return v

    total = len(df)
    if total == 0:
        cur.close(); conn.close(); return 0

    cur.fast_executemany = True
    inserted = 0
    for i in range(0, total, batch_size):
        batch = df.iloc[i:i+batch_size]
        records = [tuple(_to_py(v) for v in row) for row in batch.itertuples(index=False, name=None)]
        cur.executemany(insert_sql, records)
        conn.commit()
        inserted += len(records)
        print(f"Uploaded {inserted}/{total} rows to {full_table} (batch finished)")

    cur.close(); conn.close()
    return inserted

print('upload_df_token_fast helper loaded')


## --- Save Loop (per-dataset paths and filenames) — strict, no fallbacks or extra validation
This loop uses DATASETS_PARAM directly and lets errors surface if keys are missing or malformed.

In [None]:
from pathlib import Path
data_root = Path('data')

for ds in DATASETS_PARAM:
    name = ds['name']
    row_count_ds = int(ds['row_count'])
    change_fraction = float(ds['change_fraction'])
    new_fraction = float(ds['new_fraction'])
    delete_fraction = float(ds['delete_fraction'])
    seed_ds = int(ds['seed'])

    print(f"\n=== Generating dataset {name}: rows={row_count_ds}")
    base_output_folder = f"/lakehouse/default/Files/{name}base/"
    updates_output_folder = f"/lakehouse/default/Files/{name}updates/"
    local_dir = data_root / name
    local_dir.mkdir(parents=True, exist_ok=True)

    base_df = generate_base_dataframe(row_count_ds, schema, distribution, seed_ds)
    # Normalize ts_* columns to timezone-naive UTC before writing/parquet or push
    base_df = ensure_naive_utc(base_df)
    base_file_local = local_dir / 'base.parquet'
    base_df.to_parquet(str(base_file_local), engine='pyarrow', index=False, coerce_timestamps='ms')
    print('  wrote local base:', base_file_local)

    updates_df = generate_updates(base_df, change_fraction, new_fraction, delete_fraction, seed_ds)
    updates_df = ensure_naive_utc(updates_df)
    updates_file_local = local_dir / 'updates.parquet'
    updates_df.to_parquet(str(updates_file_local), engine='pyarrow', index=False, coerce_timestamps='ms')
    print('  wrote local updates:', updates_file_local)

    # Best-effort lakehouse write using filesystem APIs — keep simple; don't attempt retries here.
    try:
        Path(base_output_folder).mkdir(parents=True, exist_ok=True)
        (Path(base_output_folder) / 'base.parquet').write_bytes(base_file_local.read_bytes())
        Path(updates_output_folder).mkdir(parents=True, exist_ok=True)
        (Path(updates_output_folder) / 'updates.parquet').write_bytes(updates_file_local.read_bytes())
        print('  wrote lakehouse copies ->', base_output_folder, updates_output_folder)
    except Exception:
        print('  could not write to lakehouse path (skipped)')

    ctrl_dir = data_root / 'controller' / name
    ctrl_dir.mkdir(parents=True, exist_ok=True)
    (ctrl_dir / 'base.parquet').write_bytes(base_file_local.read_bytes())
    (ctrl_dir / 'updates.parquet').write_bytes(updates_file_local.read_bytes())
    print('  wrote controller copies ->', ctrl_dir)

    # Azure SQL push (token-based preferred). Keep behavior explicit: replace tables for current run.
    if PUSH_TO_AZURE_SQL:
        # prefer token-based fast upload if mssparkutils is available
        try:
            try:
                # test availability
                from notebookutils import mssparkutils
                has_token = True
            except Exception:
                has_token = False

            if has_token:
                print('  Using token-based fast upload for SQL (upload_df_token_fast)')
                upload_df_token_fast(base_df, f"base_{name}", schema_name=AZURE_SQL_SCHEMA, server=AZURE_SQL_SERVER, database=AZURE_SQL_DB, batch_size=5000)
                upload_df_token_fast(updates_df, f"updates_{name}", schema_name=AZURE_SQL_SCHEMA, server=AZURE_SQL_SERVER, database=AZURE_SQL_DB, batch_size=5000)
            else:
                print('  mssparkutils token not available; falling back to push_df_concise (pyodbc token attempt inside)')
                push_df_concise(base_df, f"base_{name}", schema_name=AZURE_SQL_SCHEMA)
                push_df_concise(updates_df, f"updates_{name}", schema_name=AZURE_SQL_SCHEMA)
        except Exception as e:
            print('  azure sql push failed (continuing):', e)

print('\nGeneration loop complete.')


## --- Metadata Logging
Log simple metadata about the files created. Kept minimal to avoid masking upstream errors.

In [None]:
def log_metadata(file_path, row_count_val, fmt, distribution_val, update_type):
    import json
    meta = {
        "file": file_path,
        "rows": row_count_val,
        "format": fmt,
        "distribution": distribution_val,
        "update_type": update_type,
        "timestamp": pd.Timestamp.now().isoformat()
    }
    meta_file = str(file_path).replace('.parquet', '.meta.json')
    with open(meta_file, 'w') as f:
        json.dump(meta, f, indent=2)
    print(f"Metadata logged: {meta_file}")

for ds in DATASETS_PARAM:
    name = ds['name']
    row_count_ds = int(ds['row_count'])
    local_dir = data_root / name
    base_file_local = local_dir / 'base.parquet'
    updates_file_local = local_dir / 'updates.parquet'
    if base_file_local.exists():
        log_metadata(str(base_file_local), row_count_ds, 'parquet', distribution, 'base')
    if updates_file_local.exists():
        log_metadata(str(updates_file_local), len(pd.read_parquet(str(updates_file_local), engine='pyarrow')), 'parquet', distribution, 'batch_updates')
