In [5]:
# --- AI Navigator Synthetic Benchmark Generator (Milestone 1) ---
# Fully aligned with /api/benchmark and heatmap expectations

import pandas as pd
import numpy as np
from scipy.stats import truncnorm
import json, os, datetime

np.random.seed(42)

# 1) Structure
dimensions = [
    "Strategy and Vision", "Data", "Technology", "Talent and Skills",
    "Organisation and Processes", "Innovation",
    "Adaptation & Adoption", "Ethics and Responsibility"
]
constructs_per_dim = 4
out_dir = "public/demo_data"
os.makedirs(out_dir, exist_ok=True)

def bounded_normal(mean, sd, low=0, high=100, n=1):
    a, b = (low - mean) / sd, (high - mean) / sd
    return truncnorm(a, b, loc=mean, scale=sd).rvs(n)[0]

# 2) Capability benchmark (flat table)
rows = []
for d in dimensions:
    for i in range(1, constructs_per_dim+1):
        mean = np.random.uniform(55, 85)
        sd = np.random.uniform(5, 15)
        rows.append({
            "dimension": d,
            "construct": f"{d} Construct {i}",
            "benchmark_mean": round(mean, 1),
            "benchmark_min": round(max(0, mean - 2*sd), 1),
            "benchmark_max": round(min(100, mean + 2*sd), 1)
        })
cap_df = pd.DataFrame(rows)
cap_df.to_csv(f"{out_dir}/sample_capability.csv", index=False)

# 3) Capability benchmark (API-friendly nested JSON)
api_payload = {
    "benchmark_name": "Synthetic OECD Benchmark",
    "benchmark_type": "synthetic",
    "created_at": datetime.date.today().strftime("%Y-%m-%d"),
    "data": [
        {
            "dimension": d,
            "constructs": cap_df[cap_df["dimension"]==d][
                ["construct","benchmark_mean","benchmark_min","benchmark_max"]
            ].to_dict(orient="records")
        }
        for d in dimensions
    ]
}
with open(f"{out_dir}/benchmark.json","w") as f:
    json.dump(api_payload, f, indent=2)

# 4) Sentiment benchmark (25 zones)
sentiment_df = pd.DataFrame({
    "area_id": range(1, 26),
    "area_name": [f"Sentiment Area {i}" for i in range(1, 26)],
    "sentiment_score": np.random.randint(40, 90, size=25),
    "benchmark_type": "synthetic"
})
sentiment_df.to_csv(f"{out_dir}/sample_sentiment.csv", index=False)

In [6]:
# Milestone 2 — Sentiment Flow & Visualization (support)

sentiment_preview = sentiment_df.to_dict(orient="records")
with open("public/demo_data/sentiment_preview.json","w") as f:
    json.dump(sentiment_preview, f, indent=2)
print("Created: sentiment_preview.json")

Created: sentiment_preview.json


In [7]:
# Milestone 3 — Capability Flow & Advanced Features (benchmarking logic)

dim_summary = (cap_df
               .groupby("dimension", as_index=False)["benchmark_mean"]
               .mean()
               .rename(columns={"benchmark_mean":"dimension_benchmark_mean"}))
dim_summary.to_json("public/demo_data/dimension_benchmark.json",
                    orient="records", indent=2)
print("Created: dimension_benchmark.json")

Created: dimension_benchmark.json


In [8]:
# Simulate a user's actual scores to verify gap = actual - benchmark_mean
actual = cap_df.copy()
actual["actual_score"] = np.clip(
    actual["benchmark_mean"] + np.random.normal(0, 8, size=len(actual)), 0, 100
)
actual["gap"] = (actual["actual_score"] - actual["benchmark_mean"]).round(1)
actual[["dimension","construct","benchmark_mean","actual_score","gap"]].to_csv(
    "public/demo_data/gap_examples.csv", index=False
)
print("Created: gap_examples.csv")

Created: gap_examples.csv


In [9]:
# (Optional) Two regional benchmarks for demo switches
# public/demo_data/benchmark_region_eu.json
# public/demo_data/benchmark_region_na.json

def vary_benchmark(df, delta_mean):
    out = df.copy()
    out["benchmark_mean"] = np.clip(out["benchmark_mean"] + delta_mean, 0, 100)
    return out

def make_api_json_from_capdf(df):
    return {
        "benchmark_name": "Synthetic OECD Benchmark (Variant)",
        "benchmark_type": "synthetic",
        "created_at": datetime.date.today().strftime("%Y-%m-%d"),
        "data": [
            {
                "dimension": d,
                "constructs": df[df["dimension"]==d][
                    ["construct","benchmark_mean","benchmark_min","benchmark_max"]
                ].to_dict(orient="records")
            }
            for d in df["dimension"].unique()
        ]
    }

eu_df = vary_benchmark(cap_df, +2.0)
na_df = vary_benchmark(cap_df, -2.0)

with open("public/demo_data/benchmark_region_eu.json","w") as f:
    json.dump(make_api_json_from_capdf(eu_df), f, indent=2)
with open("public/demo_data/benchmark_region_na.json","w") as f:
    json.dump(make_api_json_from_capdf(na_df), f, indent=2)

print("Created: benchmark_region_eu.json, benchmark_region_na.json")

Created: benchmark_region_eu.json, benchmark_region_na.json


In [10]:
# Milestone 4 — Interventions, ROI & Finalization

# Intervention rules (thresholds for triggers)

intervention_rules = [
    {"threshold": 60, "applies_to": "construct", "intervention": "Targeted Training"},
    {"threshold": 55, "applies_to": "construct", "intervention": "Data Quality Uplift"},
    {"threshold": 65, "applies_to": "dimension", "intervention": "Leadership Alignment"}
]
with open("public/demo_data/intervention_rules.json","w") as f:
    json.dump(intervention_rules, f, indent=2)
print("Created: intervention_rules.json")

Created: intervention_rules.json


In [None]:
# ROI glimpse table (synthetic ranges)

