
# Detection Taxonomy — Reproducible Simulations (Python 3.10 compatible)

This notebook is **self-contained** and tested for Python **3.10**.
It dynamically imports the two provided modules:

- `TaxonomyNodeClass.py`
- `PerformanceGraphsClass.py`

and then reproduces simulation illustrations for the paper:
continuity (Thm 1), conditioning effects (Thm 2 & Prop 1), precision lift via conditioning (Thm 3),
and a coordinate-descent equilibrium search (Thm 4).

> Dependencies: `numpy`, `pandas`, `matplotlib` (and optionally `networkx`, but it's **not required**).


In [None]:

# --- Robust dynamic import of local modules (Python 3.10 friendly) ---
import importlib.util, sys, os, traceback

def import_from_file(module_name: str, candidates):
    """
    Try to import a module from a list of candidate file paths.
    Returns the imported module or raises with a helpful error.
    """
    last_err = None
    for path in candidates:
        if not os.path.isfile(path):
            continue
        try:
            spec = importlib.util.spec_from_file_location(module_name, path)
            module = importlib.util.module_from_spec(spec)
            # Insert into sys.modules BEFORE exec to satisfy dataclass forward refs on 3.10
            sys.modules[module_name] = module
            spec.loader.exec_module(module)  # type: ignore[attr-defined]
            return module
        except Exception as e:
            last_err = e
            # Clean up and continue trying other paths
            sys.modules.pop(module_name, None)
    # If we get here, show a helpful message
    msg = f"Failed to import {module_name}. Tried paths:\n  " + "\n  ".join(candidates)
    if last_err:
        msg += f"\nLast error:\n{''.join(traceback.format_exception_only(type(last_err), last_err)).strip()}"
    raise ImportError(msg)

# Candidate paths relative to this notebook working dir and /mnt/data (common in hosted settings)
CWD = os.getcwd()
CANDIDATES_TAX = [
    os.path.join(CWD, "TaxonomyNodeClass.py"),
    "/mnt/data/TaxonomyNodeClass.py",
    os.path.join(CWD, "..", "TaxonomyNodeClass.py"),
]
CANDIDATES_PG = [
    os.path.join(CWD, "PerformanceGraphsClass.py"),
    "/mnt/data/PerformanceGraphsClass.py",
    os.path.join(CWD, "..", "PerformanceGraphsClass.py"),
]

TaxonomyNodeClass = import_from_file("TaxonomyNodeClass", CANDIDATES_TAX)
PerformanceGraphsClass = import_from_file("PerformanceGraphsClass", CANDIDATES_PG)

TaxonomyNode = TaxonomyNodeClass.TaxonomyNode
TaxonomyBayesianNetwork = PerformanceGraphsClass.TaxonomyBayesianNetwork

print("✅ Imported modules:",
      TaxonomyNodeClass.__file__, "|", PerformanceGraphsClass.__file__)


In [None]:

# --- Build taxonomy identical to paper's structure ---
def build_taxonomy():
    root = TaxonomyNode("ROOT")
    # IMPORTANT: TaxonomyNode doesn't auto-append children; do it explicitly.
    A = TaxonomyNode("A", parent=root); root.children.append(A)
    B = TaxonomyNode("B", parent=root); root.children.append(B)
    H = TaxonomyNode("H", parent=root); root.children.append(H)
    I = TaxonomyNode("I", parent=root); root.children.append(I)

    C = TaxonomyNode("C", parent=A); A.children.append(C)
    F = TaxonomyNode("F", parent=A); A.children.append(F)
    D = TaxonomyNode("D", parent=B); B.children.append(D)

    E = TaxonomyNode("E", parent=C); C.children.append(E)
    G = TaxonomyNode("G", parent=C); C.children.append(G)

    J = TaxonomyNode("J", parent=I); I.children.append(J)
    return root

root = build_taxonomy()
net = TaxonomyBayesianNetwork(taxonomy_root=root)

# Helper: map short names to canonical path names used by network
def pname(short):
    mapping = {
        "ROOT":"ROOT",
        "A":"ROOT/A","B":"ROOT/B","H":"ROOT/H","I":"ROOT/I",
        "C":"ROOT/A/C","F":"ROOT/A/F",
        "D":"ROOT/B/D",
        "E":"ROOT/A/C/E","G":"ROOT/A/C/G",
        "J":"ROOT/I/J"
    }
    return mapping[short]

# Define signed DAG edges (+ requires parent fired; - requires parent NOT fired)
edges = [
    (pname("A"), pname("C"), "+"),
    (pname("C"), pname("E"), "+"),
    (pname("C"), pname("G"), "+"),
    (pname("B"), pname("D"), "+"),
    (pname("D"), pname("E"), "-"),  # a negative dependency example
]
net.set_dag_edges(edges)

# Initialize thresholds to a sane default in [0,1]
for name in net.node_lookup.keys():
    net.thresholds[name] = 0.5

sorted(net.node_lookup.keys())


In [None]:

import numpy as np, pandas as pd

def evaluate_table(network, use_dag=True, N=30000, seed=1337):
    perf = network._evaluate_performance(use_dag=use_dag, N=N)
    df = pd.DataFrame(perf).T.loc[sorted(perf.keys())]
    return df

baseline = evaluate_table(net, use_dag=True, N=30000, seed=1337)
baseline


In [None]:

# --- Theorem 1: continuity of Precision(C|A) in (λ_A, λ_C) ---
import numpy as np
import matplotlib.pyplot as plt

lamA = np.linspace(0.0, 1.0, 25)
lamC = np.linspace(0.0, 1.0, 25)
Z = np.zeros((len(lamA), len(lamC)))

for i, la in enumerate(lamA):
    for j, lc in enumerate(lamC):
        net.thresholds.update({pname("A"): float(la), pname("C"): float(lc)})
        perf = net._evaluate_performance(use_dag=True, N=20000)
        Z[i, j] = perf[pname("C")]["precision"]

plt.figure()
plt.imshow(Z, origin="lower",
           extent=[lamC.min(), lamC.max(), lamA.min(), lamA.max()], aspect="auto")
plt.xlabel("λ_C")
plt.ylabel("λ_A")
plt.title("Precision(C|A) across (λ_A, λ_C) — continuity illustration")
plt.colorbar()
plt.show()


In [None]:

# --- Theorem 2: precision of conditioned detection vs parent precision (+ edge A→C) ---
xs, ys = [], []
for la in np.linspace(0.0, 1.0, 31):
    net.thresholds.update({pname("A"): float(la)})
    perf = net._evaluate_performance(use_dag=True, N=20000)
    xs.append(perf[pname("A")]["precision"])
    ys.append(perf[pname("C")]["precision"])

plt.figure()
plt.plot(xs, ys, marker=".")
plt.xlabel("Precision(A)")
plt.ylabel("Precision(C|A)")
plt.title("Precision trend with conditioning (A→C, +)")
plt.show()


In [None]:

# --- Theorem 3: conditioning increases precision of deeper node ---
lam = np.linspace(0.0, 1.0, 31)
prec_uncond, prec_cond = [], []
for lc in lam:
    net.thresholds.update({pname("C"): float(lc), pname("E"): 0.6})
    p_un = net._evaluate_performance(use_dag=False, N=20000)
    p_co = net._evaluate_performance(use_dag=True, N=20000)
    prec_uncond.append(p_un[pname("E")]["precision"])
    prec_cond.append(p_co[pname("E")]["precision"])

import matplotlib.pyplot as plt
plt.figure()
plt.plot(lam, prec_uncond, label="Unconditioned E")
plt.plot(lam, prec_cond, label="Conditioned E (via DAG)")
plt.xlabel("λ_C")
plt.ylabel("Precision(E)")
plt.title("Conditioning raises Precision (E | C)")
plt.legend()
plt.show()


In [None]:

# --- Proposition 1: recall(C|A) vs recall(A) ---
xs, ys = [], []
for la in np.linspace(0.0, 1.0, 31):
    net.thresholds.update({pname("A"): float(la)})
    perf = net._evaluate_performance(use_dag=True, N=20000)
    xs.append(perf[pname("A")]["recall"])
    ys.append(perf[pname("C")]["recall"])

import matplotlib.pyplot as plt
plt.figure()
plt.plot(xs, ys, marker=".")
plt.xlabel("Recall(A)")
plt.ylabel("Recall(C|A)")
plt.title("Recall trend with conditioning (A→C, +)")
plt.show()


In [None]:

# --- Theorem 4: Coordinate-descent equilibrium on (Precision+Recall)^2 ---
rng = np.random.default_rng(2025)
for name in net.node_lookup.keys():
    net.thresholds[name] = float(rng.random())

def evaluate_table(network, use_dag=True, N=40000, seed=2025):
    perf = network._evaluate_performance(use_dag=use_dag, N=N)
    import pandas as pd
    return pd.DataFrame(perf).T.loc[sorted(perf.keys())]

before = evaluate_table(net, use_dag=True, N=40000)
eq_thresholds = net.equilibrium_search(max_iters=30, steps=50, tol=1e-3,
                                       verbose=True, use_dag=True, N=40000)
after = evaluate_table(net, use_dag=True, N=40000)

import pandas as pd
summary = pd.DataFrame({
    "λ_eq": pd.Series(eq_thresholds),
    "precision_before": before["precision"],
    "recall_before": before["recall"],
    "precision_after": after["precision"],
    "recall_after": after["recall"],
}).loc[sorted(net.node_lookup.keys())]

summary
