In [1]:
# --- AI Navigator Synthetic Benchmark Generator (Milestone 1–4 Support) ---
# Fully aligned with /api/benchmark, /api/sentiment/heatmap, and related endpoints
# All outputs saved to your Downloads/demo_data folder for easy access

import pandas as pd
import numpy as np
from scipy.stats import truncnorm
import json, os, datetime

# --- Setup ---
np.random.seed(42)

# Save directly to your Downloads folder
out_dir = os.path.join(os.path.expanduser("~"), "Downloads")
os.makedirs(out_dir, exist_ok=True)

# --- 1) Define Structure ---
dimensions = [
    "Strategy and Vision", "Data", "Technology", "Talent and Skills",
    "Organisation and Processes", "Innovation",
    "Adaptation & Adoption", "Ethics and Responsibility"
]
constructs_per_dim = 4

def bounded_normal(mean, sd, low=0, high=100, n=1):
    a, b = (low - mean) / sd, (high - mean) / sd
    return truncnorm(a, b, loc=mean, scale=sd).rvs(n)[0]

# --- 2) Capability Benchmark (flat table) ---
rows = []
for d in dimensions:
    for i in range(1, constructs_per_dim + 1):
        mean = np.random.uniform(55, 85)
        sd = np.random.uniform(5, 15)
        rows.append({
            "dimension": d,
            "construct": f"{d} Construct {i}",
            "benchmark_mean": round(mean, 1),
            "benchmark_min": round(max(0, mean - 2 * sd), 1),
            "benchmark_max": round(min(100, mean + 2 * sd), 1)
        })

cap_df = pd.DataFrame(rows)
cap_df.to_csv(f"{out_dir}/sample_capability.csv", index=False)

# --- 3) Capability Benchmark (API-friendly nested JSON) ---
api_payload = {
    "benchmark_name": "Synthetic OECD Benchmark",
    "benchmark_type": "synthetic",
    "created_at": datetime.date.today().strftime("%Y-%m-%d"),
    "data": [
        {
            "dimension": d,
            "constructs": cap_df[cap_df["dimension"] == d][
                ["construct", "benchmark_mean", "benchmark_min", "benchmark_max"]
            ].to_dict(orient="records")
        }
        for d in dimensions
    ]
}

with open(f"{out_dir}/benchmark.json", "w") as f:
    json.dump(api_payload, f, indent=2)

# --- 4) Sentiment Benchmark (25 zones for heatmap) ---
sentiment_df = pd.DataFrame({
    "area_id": range(1, 26),
    "area_name": [f"Sentiment Area {i}" for i in range(1, 26)],
    "sentiment_score": np.random.randint(40, 90, size=25),
    "benchmark_type": "synthetic"
})
sentiment_df.to_csv(f"{out_dir}/sample_sentiment.csv", index=False)

print("Synthetic benchmark files created in your Downloads/demo_data folder:")
print(os.listdir(out_dir))

Synthetic benchmark files created in your Downloads/demo_data folder:
['1120_Inf_A_Survey_on_Graph_Neural_Networks_for_Intrusion_Detection_Systems__Methods__Trends_and_Challenges.pdf', '16hr_week_plan.docx', '1738520817547.jpg', '1748712938403.jpg', '20250612 003 Navigating AI Adoption.pptx.pdf', '20250805 002 Kickstart with AI Beeckestijn - Update.docx', '20250820 AI Adoption Navigator Vision Roadmap and Pitch.docx', '20250905 - 100 - Sentiments Report Vitens (1).xlsx', '20250905 - 100 - Sentiments Report Vitens (2).xlsx', '20250905 - 100 - Sentiments Report Vitens.xlsx', '20250907 - 003 - Categories and Actions (sentiment scan).xlsx - EN.csv', '20250910 - Capability Report Vitens.xlsx', '2410.03688v1.pdf', '2nd Development Phase -  Pilot-Ready MVP.md', '6854893-Master Thesis-Barocsai.pdf', 'Adobe Scan 07 Jul 2025 (1)-1.pdf', 'AI_Adoption_Navigator_Scope_of_Work.docx', 'Aligning Graph Metrics with Cloud Deployment Characteristics.pdf', 'all_workloads_bigFlows.parquet', 'all_workloads_

In [3]:
# --- AI Navigator Synthetic Benchmark Generator (Milestones 2–4) ---
# Continues from the Milestone 1 notebook
# Saves all outputs to your Downloads/demo_data folder

import os, json, datetime
import numpy as np
import pandas as pd

# reuse existing dataframes (cap_df, sentiment_df)
out_dir = os.path.join(os.path.expanduser("~"), "Downloads")
os.makedirs(out_dir, exist_ok=True)

# -------------------------------------------------------------------
# Milestone 2 — Sentiment Flow & Visualization (support)
# -------------------------------------------------------------------

sentiment_preview = sentiment_df.to_dict(orient="records")
with open(f"{out_dir}/sentiment_preview.json", "w") as f:
    json.dump(sentiment_preview, f, indent=2)
print("Created: sentiment_preview.json")

# -------------------------------------------------------------------
# Milestone 3 — Capability Flow & Advanced Features (benchmarking logic)
# -------------------------------------------------------------------

# Dimension-level benchmark
dim_summary = (
    cap_df.groupby("dimension", as_index=False)["benchmark_mean"]
    .mean()
    .rename(columns={"benchmark_mean": "dimension_benchmark_mean"})
)
dim_summary.to_json(f"{out_dir}/dimension_benchmark.json", orient="records", indent=2)
print("Created: dimension_benchmark.json")

# Simulated gaps for demo verification
actual = cap_df.copy()
actual["actual_score"] = np.clip(
    actual["benchmark_mean"] + np.random.normal(0, 8, size=len(actual)), 0, 100
)
actual["gap"] = (actual["actual_score"] - actual["benchmark_mean"]).round(1)
actual[["dimension", "construct", "benchmark_mean", "actual_score", "gap"]].to_csv(
    f"{out_dir}/gap_examples.csv", index=False
)
print("Created: gap_examples.csv")

# Optional regional benchmark variants
def vary_benchmark(df, delta_mean):
    out = df.copy()
    out["benchmark_mean"] = np.clip(out["benchmark_mean"] + delta_mean, 0, 100)
    return out

def make_api_json_from_capdf(df):
    return {
        "benchmark_name": "Synthetic OECD Benchmark (Variant)",
        "benchmark_type": "synthetic",
        "created_at": datetime.date.today().strftime("%Y-%m-%d"),
        "data": [
            {
                "dimension": d,
                "constructs": df[df["dimension"] == d][
                    ["construct", "benchmark_mean", "benchmark_min", "benchmark_max"]
                ].to_dict(orient="records"),
            }
            for d in df["dimension"].unique()
        ],
    }

eu_df = vary_benchmark(cap_df, +2.0)
na_df = vary_benchmark(cap_df, -2.0)

with open(f"{out_dir}/benchmark_region_eu.json", "w") as f:
    json.dump(make_api_json_from_capdf(eu_df), f, indent=2)
with open(f"{out_dir}/benchmark_region_na.json", "w") as f:
    json.dump(make_api_json_from_capdf(na_df), f, indent=2)

print("Created: benchmark_region_eu.json, benchmark_region_na.json")

# -------------------------------------------------------------------
# Milestone 4 — Interventions, ROI & Finalization (support)
# -------------------------------------------------------------------

# Intervention rules (thresholds)
intervention_rules = [
    {"threshold": 60, "applies_to": "construct", "intervention": "Targeted Training"},
    {"threshold": 55, "applies_to": "construct", "intervention": "Data Quality Uplift"},
    {"threshold": 65, "applies_to": "dimension", "intervention": "Leadership Alignment"},
]
with open(f"{out_dir}/intervention_rules.json", "w") as f:
    json.dump(intervention_rules, f, indent=2)
print("Created: intervention_rules.json")

# ROI glimpse table
roi_df = cap_df[["dimension", "construct"]].copy()
roi_df["gain_low"] = np.random.randint(5, 10, size=len(roi_df))
roi_df["gain_high"] = roi_df["gain_low"] + np.random.randint(4, 9, size=len(roi_df))
roi_df.to_csv(f"{out_dir}/sample_roi.csv", index=False)
print("Created: sample_roi.csv")


Created: sentiment_preview.json
Created: dimension_benchmark.json
Created: gap_examples.csv
Created: benchmark_region_eu.json, benchmark_region_na.json
Created: intervention_rules.json
Created: sample_roi.csv
