In [1]:
#!/usr/bin/env python3
# star_network_identify.py
# ---------------------------------------------------------------
"""
Identify star-like network topologies (A/B/C) from time-series produced by a
high-precision coupled map lattice (CML). The workflow is:

1) Simulate node dynamics on directed star graphs with high numerical precision.
2) Compute node-wise error strengths ‖x_{t+1} - 2 x_t‖ (wrapped on the circle).
3) Use a 2-component Gaussian Mixture Model (GMM) to separate hubs vs. leaves.
4) With two segments (from B/C candidates), compare hubs' residual variances to
   decide whether a segment came from type-B (both hubs fully connected) or
   type-C (leaves split across two hubs).

Conventions
-----------
- Adjacency A is binary and directed. A[j, i] = 1 means a directed edge j → i,
  i.e., node j contributes to the coupling term of node i.
- Trajectories are stored as a NumPy array of shape (N, T_seg), with N nodes
  and T_seg time steps after discarding transients.
"""

from __future__ import annotations
from decimal import Decimal, getcontext
import numpy as np
import mpmath as mp
from typing import Callable, Tuple
from sklearn.mixture import GaussianMixture

# ---------- Global High Precision Settings ----------
getcontext().prec = 200
mp.mp.dps = getcontext().prec
TWOPI = mp.mpf('6.283185307179586476925286766559')
SIGMA_H2 = 0.5  # ∫ h^2(0,y) dm(y)  (kept for reference; not used directly)

# ------------------------------------------------------------------
#  I. High-Precision Network System (unchanged)
# ------------------------------------------------------------------
class GraphSystemDecimal:
    """
    Coupled map lattice on a directed graph with high-precision arithmetic.

    Each node evolves via a local map (default: doubling map x ↦ 2x mod 1),
    plus diffusive sinusoidal coupling from its in-neighbors.

    Parameters
    ----------
    A : np.ndarray
        Directed adjacency matrix of shape (N, N). A[j, i] = 1 indicates j → i.
    alpha : str, optional
        Coupling strength as a decimal string (for exact Decimal parsing).
    local_map : Callable[[Decimal], Decimal], optional
        Local map f(x). Defaults to doubling map `(2*x) % 1`.
    coupling_fn : Callable[[Decimal, Decimal], Decimal], optional
        Pairwise coupling c(x_s, x_t) from source s to target t. If None,
        uses a sinusoidal diffusive term `-sin(2π x_s) + sin(2π x_t)`.
    seed : int, optional
        Random seed for initial conditions.

    Attributes
    ----------
    N : int
        Number of nodes.
    Delta : float
        Maximum out-degree (max column sum) used for normalization.
    x : list[Decimal]
        Current node states.
    t : int
        Current time step.

    Notes
    -----
    - High precision is enforced via Python's Decimal and mpmath.
    - The coupling increment at node i is normalized by Delta.
    """

    def __init__(self, A: np.ndarray, alpha: str = '0.25',
                 local_map: Callable[[Decimal], Decimal] | None = None,
                 coupling_fn: Callable[[Decimal, Decimal], Decimal] | None = None,
                 seed: int = 0):
        self.A = np.asarray(A, dtype=float)
        self.N = self.A.shape[0]
        self.Delta = self.A.sum(axis=0).max()
        self.alpha = Decimal(alpha)
        self.local_map = local_map or (lambda x: (Decimal(2) * x) % 1)
        self.coupling = coupling_fn or self._default_coupling
        self.rng = np.random.default_rng(seed)
        self.reset()

    @staticmethod
    def _default_coupling(xs: Decimal, xt: Decimal) -> Decimal:
        """
        Default sinusoidal diffusive coupling.

        Parameters
        ----------
        xs : Decimal
            Source node state.
        xt : Decimal
            Target node state.

        Returns
        -------
        Decimal
            c(xs, xt) = -sin(2π xs) + sin(2π xt), as a Decimal.
        """
        v = -mp.sin(TWOPI * mp.mpf(str(xs))) + mp.sin(TWOPI * mp.mpf(str(xt)))
        return Decimal(str(v))

    def _coupling_term(self):
        """
        Compute normalized coupling increment for each node.

        Returns
        -------
        list[Decimal]
            A list of length N with the coupling increment for each node,
            normalized by the maximum out-degree Δ.

        Notes
        -----
        The increment for node i is the sum over j with A[j, i] = 1 of
        c(x_j, x_i), divided by Δ to keep scales comparable across graphs.
        """
        incr = [Decimal(0)] * self.N
        for j in range(self.N):
            if self.A[j].sum() == 0:
                continue  # node j has no outgoing edges
            for i in range(self.N):
                if self.A[j, i]:
                    incr[i] += self.coupling(self.x[j], self.x[i])
        d = Decimal(str(self.Delta))
        return [v / d for v in incr]

    def step(self):
        """
        Advance the system by one time step.

        Returns
        -------
        list[Decimal]
            The updated state vector x_{t+1} (length N) as Decimals.
        """
        xn = [self.local_map(x) for x in self.x]  # local map update
        coup = self._coupling_term()              # diffusive coupling
        xn = [(xi + self.alpha * ci) % 1 for xi, ci in zip(xn, coup)]
        self.x = xn
        self.t += 1
        return xn

    def reset(self):
        """
        Reset the system to a fresh random initial condition.

        Notes
        -----
        States are sampled i.i.d. ~ Uniform(0, 1) and stored as Decimal.
        """
        self.x = [Decimal(str(v)) for v in self.rng.random(self.N)]
        self.t = 0

    def run(self, T: int, discard: int = 0):
        """
        Simulate for T time steps and return the trajectory after discarding transients.

        Parameters
        ----------
        T : int
            Total number of steps to simulate.
        discard : int, optional
            Number of initial steps to discard as transients.

        Returns
        -------
        np.ndarray
            Array of shape (N, max(0, T - discard)) with float64 views of states.
        """
        traj = np.zeros((self.N, max(0, T - discard)))
        for k in range(T):
            xt = self.step()
            if k >= discard:
                traj[:, k - discard] = [float(v) for v in xt]
        return traj


# ------------------------------------------------------------------
#  II. Star Graph Generators (three variants)
# ------------------------------------------------------------------

def graph_A(N: int):
    """
    Create a star graph with a single hub (node N-1) pointed to by all leaves.

    Parameters
    ----------
    N : int
        Number of nodes.

    Returns
    -------
    np.ndarray
        Adjacency matrix A of shape (N, N) with A[j, N-1] = 1 for j = 0..N-2.
    """
    A = np.zeros((N, N))
    A[np.arange(N - 1), N - 1] = 1
    return A


def graph_B(N: int):
    """
    Create a star-like graph with two hubs (nodes N-2 and N-1).
    Every leaf connects to both hubs.

    Parameters
    ----------
    N : int
        Number of nodes.

    Returns
    -------
    np.ndarray
        Adjacency matrix A with A[leaf, N-1] = A[leaf, N-2] = 1 for all leaves.
    """
    A = np.zeros((N, N))
    leaves = np.arange(N - 2)
    A[leaves, N - 1] = 1
    A[leaves, N - 2] = 1
    return A


def graph_C(N: int):
    """
    Create a two-hub graph where leaves are split into two halves;
    each half connects to exactly one of the hubs.

    Parameters
    ----------
    N : int
        Number of nodes.

    Returns
    -------
    np.ndarray
        Adjacency matrix A with two hubs (N-2, N-1) and disjoint leaf sets.
    """
    A = np.zeros((N, N))
    half = N // 2
    A[np.arange(half - 1), N - 2] = 1
    A[np.arange(half - 1, N - 2), N - 1] = 1
    return A


# ------------------------------------------------------------------
#  III. GMM-based Hub Detection & Core Statistics
# ------------------------------------------------------------------

def moddiff(u):
    """
    Wrap a real array onto the interval (-0.5, 0.5] using modulo-1 arithmetic.

    Parameters
    ----------
    u : array_like
        Input values (can be scalar or array).

    Returns
    -------
    np.ndarray or float
        Wrapped values with the same shape as input.
    """
    return ((u + 0.5) % 1) - 0.5


def compute_strength(traj):
    """
    Compute node-wise mean error strength ‖x_{t+1} - 2 x_t‖ (wrapped).

    Parameters
    ----------
    traj : np.ndarray
        Trajectory of shape (N, T), after discarding transients.

    Returns
    -------
    np.ndarray
        Vector of length N, where entry i is the mean absolute wrapped error
        for node i across time.
    """
    x, x1 = traj[:, :-1], traj[:, 1:]
    return np.abs(moddiff(x1 - 2 * x)).mean(axis=1)


def gmm_hubs(S, seed=0):
    """
    Use a 2-component Gaussian Mixture Model to separate hubs (larger error
    strength) from leaves.

    Parameters
    ----------
    S : np.ndarray
        Mean error strengths for N nodes; shape (N,).
    seed : int, optional
        Random seed for GMM initialization.

    Returns
    -------
    np.ndarray
        Boolean mask of shape (N,), where True indicates a hub (the component
        with the larger mean).
    """
    g = GaussianMixture(2, random_state=seed).fit(S.reshape(-1, 1))
    return g.predict(S.reshape(-1, 1)) == np.argmax(g.means_)


def beta_var(x: np.ndarray) -> float:
    """
    Estimate the variance of residuals in
    y_t = x_{t+1} - 2 x_t + β sin(2π x_t), via least squares for β.

    Parameters
    ----------
    x : np.ndarray
        Single-node time series of shape (T,).

    Returns
    -------
    float
        Variance of residuals y_t + β sin(2π x_t).
    """
    y = moddiff(x[1:] - 2 * x[:-1])
    s = -np.sin(2 * np.pi * x[:-1])
    beta = -(y @ s) / (s @ s)
    resid = y + beta * s
    return resid.var()


# ------------------------------------------------------------------
#  IV-a  Single Segment → Classify A vs (B/C)
# ------------------------------------------------------------------

def classify_A_and_BC(traj: np.ndarray, N: int) -> str:
    """
    Classify a single segment as 'A_N' (exactly one hub) or 'B_N and C_N' (two hubs).

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T).
    N : int
        Number of nodes (kept for signature compatibility; not used).

    Returns
    -------
    str
        'A_N' if exactly one hub is detected; otherwise 'B_N and C_N'.
    """
    S = compute_strength(traj)
    hubs = np.where(gmm_hubs(S))[0]
    if hubs.size == 1:
        return "A_N"
    return "B_N and C_N"


# ------------------------------------------------------------------
#  IV-b  Compute "mean hub variance" for one segment
# ------------------------------------------------------------------

def average_hub_variance(traj: np.ndarray) -> float:
    """
    Compute the mean β-residual variance over the two hubs of a B/C star graph.

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T).

    Returns
    -------
    float
        The average of beta_var over the two hubs.

    Raises
    ------
    RuntimeError
        If the segment does not appear to have exactly two hubs.
    """
    S = compute_strength(traj)
    hubs = np.where(gmm_hubs(S))[0]
    if hubs.size != 2:
        raise RuntimeError("This segment does not correspond to a B/C graph (number of hubs ≠ 2)")
    return float(np.mean([beta_var(traj[i]) for i in hubs]))

# ------------------------------------------------------------------
#  IV-c **Key addition**: Two segments → compare variances → classify B vs C
# ------------------------------------------------------------------
def classify_B_vs_C(traj_first: np.ndarray, traj_second: np.ndarray) -> Tuple[str, float, float]:
    """
    Distinguish type-B vs. type-C using hub residual variances from two segments.

    Parameters
    ----------
    traj_first : np.ndarray
        First trajectory, shape (N, T), from a B/C candidate graph.
    traj_second : np.ndarray
        Second trajectory, shape (N, T), from a B/C candidate graph.

    Returns
    -------
    tuple[str, float, float]
        (label, var_first, var_second)
        - label: 'first_is_B' if var_first < var_second, else 'first_is_C'
        - var_first: mean hub variance of the first segment
        - var_second: mean hub variance of the second segment

    Notes
    -----
    Lower hub variance ⇒ higher in-degree ⇒ type-B.
    """
    var1 = average_hub_variance(traj_first)
    var2 = average_hub_variance(traj_second)
    if var1 < var2:  # smaller variance ⇒ larger in-degree ⇒ B graph
        return "first_is_B", var1, var2
    else:
        return "first_is_C", var1, var2

# ------------------------------------------------------------------
#  V. Demo
# ------------------------------------------------------------------
if __name__ == "__main__":
    N, T, discard = 10, 6000, 600
    alpha = '0.25'

    # 1) Demonstrate single-segment classification A/B/C
    for gname, maker in [("A_N", graph_A), ("B_N", graph_B), ("C_N", graph_C)]:
        traj = GraphSystemDecimal(maker(N), alpha=alpha, seed=hash(gname) % 2**32).run(T, discard)
        print(f"{gname}  → classify_ABC → {classify_A_and_BC(traj, N)}")

    # 2) Demonstrate variance comparison to distinguish B / C
    trajB = GraphSystemDecimal(graph_B(N), alpha=alpha, seed=1).run(T, discard)
    trajC = GraphSystemDecimal(graph_C(N), alpha=alpha, seed=2).run(T, discard)

    res, v_first, v_second = classify_B_vs_C(trajB, trajC)  # B comes first
    print("\nComparing two sequences:", res)
    print(f"  Mean hub variance of the first segment  = {v_first:.6e}")
    print(f"  Mean hub variance of the second segment = {v_second:.6e}")


A_N  → classify_ABC → A_N
B_N  → classify_ABC → B_N and C_N
C_N  → classify_ABC → B_N and C_N

Comparing two sequences: first_is_B
  Mean hub variance of the first segment  = 3.902977e-03
  Mean hub variance of the second segment = 7.846020e-03


In [6]:
# ======== New or Replacement Section Begins =================================
# I. General Logistic Map (Decimal version, co-existing with original 2 x mod 1)
def logistic_map_decimal(x: Decimal) -> Decimal:
    """
    Logistic map in Decimal precision: f(x) = 4 x (1 - x)  (mod 1).

    Parameters
    ----------
    x : Decimal
        State value in [0, 1).

    Returns
    -------
    Decimal
        f(x) mapped back to [0, 1) using modulo-1, to match the original design.

    Notes
    -----
    Keeping the modulo-1 ensures consistency with the doubling-map implementation.
    """
    # “% 1” keeps the same format as in the original implementation
    return (Decimal(4) * x * (Decimal(1) - x)) % 1


# II. Example interface for an optional coupling function
def coupling_sin_diff(xs: Decimal, xt: Decimal) -> Decimal:
    """
    Sinusoidal diffusive coupling in Decimal precision:
    c(xs, xt) = -sin(2π xs) + sin(2π xt).

    Parameters
    ----------
    xs : Decimal
        Source node state.
    xt : Decimal
        Target node state.

    Returns
    -------
    Decimal
        Coupling contribution from xs to xt.
    """
    v = -mp.sin(TWOPI * mp.mpf(str(xs))) + mp.sin(TWOPI * mp.mpf(str(xt)))
    return Decimal(str(v))


# III. --- Modify compute_strength / beta_var so they depend on local_map ---
def compute_strength(
    traj: np.ndarray,
    local_map_vec: Callable[[np.ndarray], np.ndarray]
) -> np.ndarray:
    """
    Compute node-wise mean absolute innovation relative to the local map:
    S_i = ⟨|Δ_i|⟩, where Δ_i(t) = x_{t+1,i} − f(x_{t,i}) wrapped by modulo-1.

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T) for N nodes and T time steps
        (after any transient discard).
    local_map_vec : Callable[[np.ndarray], np.ndarray]
        Vectorized local map f applied elementwise to traj[:, :-1].
        It must accept an array of shape (N, T-1) and return the same shape.

    Returns
    -------
    np.ndarray
        Strength vector S of shape (N,), one entry per node.
    """
    x, x1 = traj[:, :-1], traj[:, 1:]
    Delta = moddiff(x1 - local_map_vec(x))
    return np.abs(Delta).mean(axis=1)


def beta_var(
    traj_i: np.ndarray,
    local_map_vec: Callable[[np.ndarray], np.ndarray],
    I_h_vec: Callable[[np.ndarray], np.ndarray]
) -> float:
    """
    Estimate β by least squares and return the residual variance for one node.

    Model
    -----
    y_t = x_{t+1} − f(x_t) + β · I_h(x_t),
    where I_h(x) = ∫ h(x, y) dm(y).
    All differences are wrapped via modulo-1 to stay on the circle.

    Parameters
    ----------
    traj_i : np.ndarray
        Single-node series of shape (T,).
    local_map_vec : Callable[[np.ndarray], np.ndarray]
        Vectorized local map f for arrays of shape (T-1,) → (T-1,).
    I_h_vec : Callable[[np.ndarray], np.ndarray]
        Vectorized function I_h for arrays of shape (T-1,) → (T-1,).

    Returns
    -------
    float
        Variance of residuals y + β · I_h(x).

    Notes
    -----
    β is obtained by minimizing ‖y + β s‖² with s = I_h(x), yielding
    β* = −(yᵀ s)/(sᵀ s).
    """
    x     = traj_i[:-1]
    y     = moddiff(traj_i[1:] - local_map_vec(x))
    s     = I_h_vec(x)
    beta  = -(y @ s) / (s @ s)
    resid = y + beta * s
    return resid.var()


# IV. --- Vectorized utilities, isolated from the Decimal system -------------
# logistic_vec: elementwise version of f(x) = 4x(1-x) acting on ndarray inputs.
logistic_vec = np.vectorize(lambda u: 4.0 * u * (1.0 - u))          # f(x)

# Ih_vec: elementwise version of I_h(x) = ∫ h(x, y) dm(y); here chosen as -sin(2πx).
Ih_vec       = np.vectorize(lambda u: -np.sin(2 * np.pi * u))       # ∫ h dm


# V. --- Adapted classification functions -----------------------------------
def classify_A_and_BC(traj: np.ndarray) -> str:
    """
    Classify a single segment as 'A_N' (one hub) or 'B_N and C_N' (two hubs),
    using the logistic local map and sinusoidal integral I_h by default.

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T).

    Returns
    -------
    str
        'A_N' if exactly one hub is detected by GMM; otherwise 'B_N and C_N'.
    """
    S    = compute_strength(traj, logistic_vec)
    hubs = np.where(gmm_hubs(S))[0]
    return "A_N" if hubs.size == 1 else "B_N and C_N"


def average_hub_variance(traj: np.ndarray) -> float:
    """
    Compute the mean residual variance across the two detected hubs
    for a B/C star graph, using the logistic local map and I_h.

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T).

    Returns
    -------
    float
        Average of beta_var over the two hubs.

    Raises
    ------
    RuntimeError
        If the number of detected hubs is not exactly two.
    """
    S    = compute_strength(traj, logistic_vec)
    hubs = np.where(gmm_hubs(S))[0]
    if hubs.size != 2:
        raise RuntimeError("Data are not from a B/C graph (number of hubs ≠ 2)")
    vars_ = [beta_var(traj[i], logistic_vec, Ih_vec) for i in hubs]
    return float(np.mean(vars_))


# VI. ------------------------ Demo -----------------------------------------
if __name__ == "__main__":
    """
    Demo: run the system under the logistic map and sinusoidal coupling,
    then classify A/B/C for single segments and distinguish B vs C by variance.
    """
    N, T, discard = 10, 6000, 600
    alpha = '0.25'

    local_map = logistic_map_decimal
    coupling  = coupling_sin_diff

    # Demonstrate single-segment classification for A/B/C
    for gname, maker in [("A_N", graph_A),
                         ("B_N", graph_B),
                         ("C_N", graph_C)]:
        gs   = GraphSystemDecimal(maker(N), alpha=alpha,
                                  local_map=local_map,
                                  coupling_fn=coupling,
                                  seed=hash(gname) % 2**32)
        traj = gs.run(T, discard)
        print(f"{gname}  → classify_ABC → {classify_A_and_BC(traj)}")

    # Compare two sequences to distinguish between B and C
    trajB = GraphSystemDecimal(graph_B(N), alpha=alpha,
                               local_map=local_map,
                               coupling_fn=coupling,
                               seed=1).run(T, discard)
    trajC = GraphSystemDecimal(graph_C(N), alpha=alpha,
                               local_map=local_map,
                               coupling_fn=coupling,
                               seed=2).run(T, discard)

    res, v_first, v_second = classify_B_vs_C(trajB, trajC)
    print("\nCompare the two sequences:", res)
    print(f"  Mean hub variance for the first sequence  = {v_first:.6e}")
    print(f"  Mean hub variance for the second sequence = {v_second:.6e}")
# ======== New or Replacement Section Ends ===================================


A_N  → classify_ABC → A_N
B_N  → classify_ABC → B_N and C_N
C_N  → classify_ABC → B_N and C_N

Compare the two sequences: first_is_B
  Mean hub variance for the first sequence  = 2.936251e-03
  Mean hub variance for the second sequence = 6.113247e-03


In [7]:
# ---------- 1. Theoretical variance function ---------------------
def theoretical_hub_variance(N: int,
                             graph_type: str,
                             sigma_h2: float = 0.3898615457,
                             alpha: float = 0.25) -> float:
    """
    Compute the closed-form (idealized) variance of hub nodes for type-B or type-C stars.

    The model assumes a diffusive coupling with per-step innovation variance
    proportional to σ_h^2 and a normalization by the maximum out-degree Δ.
    For star variants:
      - B: every leaf connects to both hubs → L = N - 2, Δ = N - 2
      - C: leaves split evenly across the two hubs → L = N//2 - 1, Δ = N//2 - 1

    The returned variance is:
        Var_hub = (alpha**2) * sigma_h2 * L / (Delta**2)

    Parameters
    ----------
    N : int
        Number of nodes in the graph.
    graph_type : str
        'B' or 'C' for the corresponding star topology.
    sigma_h2 : float, optional
        The integral of squared kernel (e.g., σ_h^2 = ∫ h^2 dm) used by the theory.
    alpha : float, optional
        Coupling strength.

    Returns
    -------
    float
        Theoretical hub variance for the specified graph type.

    Raises
    ------
    ValueError
        If `graph_type` is not 'B' or 'C'.
    """
    if graph_type == "B":
        L = N - 2
        Delta = N - 2
    elif graph_type == "C":
        L = N // 2 - 1
        Delta = N // 2 - 1
    else:
        raise ValueError("graph_type must be 'B' or 'C'")
    return (alpha ** 2) * sigma_h2 * L / (Delta ** 2)


# ---------- 2. Single-sequence B/C classification -------------------
def classify_single_BC_theory(traj: np.ndarray, N: int) -> tuple[str, dict]:
    """
    Classify a single N×T trajectory as 'B_N' or 'C_N' by matching empirical vs. theoretical hub variances.

    Procedure
    ---------
    a) Detect hubs with a 2-component GMM on node strength S_i = ⟨|Δ_i|⟩ where
       Δ_i(t) = x_{t+1,i} − f(x_{t,i}), using the logistic local map f.
    b) Compute empirical hub variance V_hat as the mean of β-residual variances
       across the two hubs (see `average_hub_variance`).
    c) Compute theoretical hub variances V_B_th, V_C_th via `theoretical_hub_variance`.
    d) Pick the label minimizing |log V_hat − log V_th|.

    Parameters
    ----------
    traj : np.ndarray
        Trajectory array of shape (N, T), after any transient discard.
    N : int
        Number of nodes (used in the theoretical formulas).

    Returns
    -------
    tuple[str, dict]
        - label : {'B_N', 'C_N'}
            Predicted graph type for the given trajectory.
        - info : dict
            Debugging payload with keys:
              * 'V_hat'  : float, empirical mean hub variance
              * 'V_B_th' : float, theoretical hub variance for type-B
              * 'V_C_th' : float, theoretical hub variance for type-C
              * 'd_B'    : float, |log(V_hat) - log(V_B_th)|
              * 'd_C'    : float, |log(V_hat) - log(V_C_th)|
              * 'hubs'   : list[int], indices of detected hubs

    Raises
    ------
    RuntimeError
        If the detected number of hubs is not exactly two.

    Notes
    -----
    - This function relies on the global `logistic_vec`, `gmm_hubs`, and
      `average_hub_variance` utilities defined elsewhere.
    - Assumes V_hat, V_B_th, V_C_th > 0 so that logarithms are defined.
    """
    # a) Locate hubs
    S = compute_strength(traj, logistic_vec)          # Use a different mapper? Modify this call.
    hubs = np.where(gmm_hubs(S))[0]
    if hubs.size != 2:
        raise RuntimeError("Number of hubs in trajectory ≠ 2; not a B/C graph")

    # b) Empirical average variance of hubs
    V_hat = average_hub_variance(traj)

    # c) Theoretical values
    V_B_th = theoretical_hub_variance(N, "B")
    V_C_th = theoretical_hub_variance(N, "C")

    # d) Log-distance between empirical and theoretical variances
    d_B = abs(np.log(V_hat) - np.log(V_B_th))
    d_C = abs(np.log(V_hat) - np.log(V_C_th))

    label = "B_N" if d_B < d_C else "C_N"
    info = {
        "V_hat": V_hat,
        "V_B_th": V_B_th,
        "V_C_th": V_C_th,
        "d_B": d_B,
        "d_C": d_C,
        "hubs": hubs.tolist(),
    }
    return label, info


# ---------- 3. Demo --------------------------------
if __name__ == "__main__":
    """
    Demo
    ----
    Generate trajectories from type-B and type-C stars under the logistic map
    and sinusoidal diffusive coupling, then classify each single sequence using
    the theory-matching approach above.
    """
    N, T, discard = 50, 8000, 800
    alpha = 0.25
    local_map = logistic_map_decimal
    coupling  = coupling_sin_diff

    # Simulate B and C star graphs
    trajB = (GraphSystemDecimal(graph_B(N),
                                alpha=alpha,
                                local_map=local_map,
                                coupling_fn=coupling,
                                seed=1)
             .run(T, discard))
    trajC = (GraphSystemDecimal(graph_C(N),
                                alpha=alpha,
                                local_map=local_map,
                                coupling_fn=coupling,
                                seed=2)
             .run(T, discard))

    # Classify each trajectory independently
    resB, infoB = classify_single_BC_theory(trajB, N)
    resC, infoC = classify_single_BC_theory(trajC, N)

    print("Trajectory B → classified as:", resB, "| debug:", infoB)
    print("Trajectory C → classified as:", resC, "| debug:", infoC)


Trajectory B → classified as: B_N | debug: {'V_hat': 0.0005113619541740942, 'V_B_th': 0.0005076322209635416, 'V_C_th': 0.0010152644419270831, 'd_B': 0.0073204537525510815, 'd_C': 0.6858267268073943, 'hubs': [48, 49]}
Trajectory C → classified as: C_N | debug: {'V_hat': 0.0010205339499985673, 'V_B_th': 0.0005076322209635416, 'V_C_th': 0.0010152644419270831, 'd_B': 0.6983240387998242, 'd_C': 0.00517685823987879, 'hubs': [48, 49]}


In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
B/C star-network classifier via alpha^2-consistency and hub empirical law.

This module provides:
  1) A high-precision coupled-map simulator on a directed graph with states on
     the one-dimensional torus T = R/Z (represented by [0, 1)).
  2) Star-graph generators for types A, B, and C.
  3) Utilities to detect hubs from data (2-component GMM on per-node errors).
  4) Segment-level statistics for hubs:
       - residual variance V (from a least-squares regression y ~ m_h(x)),
       - empirical expectation K_hat = <K(x_t)> using the hub time series,
     where m_h(x) = ∫ h(x,y) dm(y) and K(x) = Var_y[h(x,y)] are known in closed form.
  5) A parameter-free B-vs-C classifier that uses two time segments with a
     common (unknown) coupling strength α and decides which segment is B and
     which is C by comparing the consistency of the implied α^2 under the
     two topological hypotheses.

Default model (matches the math write-up):
  - Local map: f(x) = 2 x (mod 1).
  - Coupling  : h(x, y) = 2 sin(x) sin(y).  (x, y are angles in radians.)
  - Invariant measure for leaves: Lebesgue on the circle.
  - Normalization: the coupling term at node i is divided by Δ = max in-degree.

Dependencies:
  numpy, mpmath, scikit-learn (GaussianMixture).
"""

from __future__ import annotations

from decimal import Decimal, getcontext
from typing import Callable, Tuple, Dict, Any

import mpmath as mp
import numpy as np
from sklearn.mixture import GaussianMixture


# ---------------------------------------------------------------------
# Global precision for Decimal and mpmath (used in the simulator)
# ---------------------------------------------------------------------
getcontext().prec = 200     # Decimal precision
mp.mp.dps = getcontext().prec


# ---------------------------------------------------------------------
# High-precision network simulator
# ---------------------------------------------------------------------
class GraphSystemDecimal:
    """
    Coupled-map lattice on a directed graph with high-precision arithmetic.

    State space: one-dimensional torus T = R/Z (represented by [0, 1)).
    Update rule for node n (mod 1):
        x_n(t+1) = f(x_n(t)) + (alpha / Delta) * sum_{j: A[j, n]=1} h(x_n(t), x_j(t)),
    where Delta = max in-degree over all nodes.

    Parameters
    ----------
    A : np.ndarray (N x N)
        Directed adjacency matrix. A[j, i] = 1 means edge j -> i (j contributes to i).
    alpha : str
        Coupling strength as a string, parsed into Decimal for precision.
    local_map : Callable[[Decimal], Decimal], optional
        Local map f acting on Decimal in [0,1). Default: doubling map (2*x) % 1.
    coupling_fn : Callable[[Decimal, Decimal], Decimal], optional
        Pairwise coupling h(xs, xt) from source xs to target xt. Default: 2 sin xs sin xt.
    seed : int
        RNG seed for i.i.d. Uniform(0,1) initialization of the states.

    Attributes
    ----------
    N : int
        Number of nodes.
    Delta : float
        Maximum in-degree (max column sum of A), used for normalization.
    x : list[Decimal]
        Current state vector.
    """

    def __init__(
        self,
        A: np.ndarray,
        alpha: str = "0.25",
        local_map: Callable[[Decimal], Decimal] | None = None,
        coupling_fn: Callable[[Decimal, Decimal], Decimal] | None = None,
        seed: int = 0,
    ):
        self.A = np.asarray(A, dtype=float)
        self.N = self.A.shape[0]
        # Max in-degree (column sum) for normalization
        self.Delta = self.A.sum(axis=0).max()
        self.alpha = Decimal(alpha)
        # Default local map: doubling map on the circle
        self.local_map = local_map or (lambda z: (Decimal(2) * z) % 1)
        # Default coupling: h(xs, xt) = 2 sin xs sin xt
        self.coupling = coupling_fn or coupling_sin_sin
        self.rng = np.random.default_rng(seed)
        self.reset()

    def _coupling_term(self) -> list[Decimal]:
        """
        Compute the normalized coupling increment for each node.

        Returns
        -------
        list[Decimal]
            For each node i, the quantity (1/Delta) * sum_{j} A[j, i] * h(x_j, x_i).
        """
        incr = [Decimal(0)] * self.N
        for j in range(self.N):
            if self.A[j].sum() == 0:
                continue  # node j has no outgoing edges
            for i in range(self.N):
                if self.A[j, i]:
                    incr[i] += self.coupling(self.x[j], self.x[i])
        d = Decimal(str(self.Delta))
        return [v / d for v in incr]

    def step(self) -> list[Decimal]:
        """
        Advance the system by one step: x <- f(x) + alpha * coupling (mod 1).

        Returns
        -------
        list[Decimal]
            The updated state vector.
        """
        xn = [self.local_map(x) for x in self.x]  # apply f
        coup = self._coupling_term()
        xn = [(xi + self.alpha * ci) % 1 for xi, ci in zip(xn, coup)]
        self.x = xn
        return xn

    def reset(self) -> None:
        """Reset states to i.i.d. Uniform(0,1) in Decimal precision."""
        self.x = [Decimal(str(v)) for v in self.rng.random(self.N)]

    def run(self, T: int, discard: int = 0) -> np.ndarray:
        """
        Simulate for T steps and return the trajectory after discarding a transient.

        Parameters
        ----------
        T : int
            Total number of simulation steps.
        discard : int
            Number of initial steps to discard as transient.

        Returns
        -------
        np.ndarray
            Array of shape (N, max(0, T - discard)) with float64 snapshots of states.
        """
        traj = np.zeros((self.N, max(0, T - discard)))
        for k in range(T):
            xt = self.step()
            if k >= discard:
                traj[:, k - discard] = [float(v) for v in xt]
        return traj


# ---------------------------------------------------------------------
# Star graph generators (A: single hub; B: two hubs, all leaves to both;
# C: two hubs, leaves split into two disjoint halves)
# ---------------------------------------------------------------------
def graph_A(N: int) -> np.ndarray:
    """Star with one hub at index N-1 (all leaves point to the hub)."""
    A = np.zeros((N, N))
    A[np.arange(N - 1), N - 1] = 1
    return A


def graph_B(N: int) -> np.ndarray:
    """Two hubs at indices N-2 and N-1; every leaf connects to both hubs."""
    A = np.zeros((N, N))
    leaves = np.arange(N - 2)
    A[leaves, N - 1] = 1
    A[leaves, N - 2] = 1
    return A


def graph_C(N: int) -> np.ndarray:
    """
    Two hubs at indices N-2 and N-1; the leaves (0..N-3) are split evenly:
    first half -> hub N-2, second half -> hub N-1.
    """
    A = np.zeros((N, N))
    L = N - 2       # number of leaves
    half = L // 2
    first = np.arange(0, half)
    second = np.arange(half, L)
    A[first, N - 2] = 1
    A[second, N - 1] = 1
    return A


# ---------------------------------------------------------------------
# Model-specific h, its integral m_h, and the variance kernel K
#   h(x, y) = 2 sin x sin y   (angles in radians)
#   m_h(x)  = ∫ h(x, y) dm(y) = C1 sin x
#   K(x)    = Var_y[h(x, y)]  = CK sin^2 x
# ---------------------------------------------------------------------
def coupling_sin_sin(xs: Decimal, xt: Decimal) -> Decimal:
    """High-precision coupling: h(xs, xt) = 2 sin(xs) sin(xt)."""
    v = 2.0 * float(mp.sin(mp.mpf(str(xs)))) * float(mp.sin(mp.mpf(str(xt))))
    return Decimal(str(v))


C1 = 2.0 * (1.0 - np.cos(1.0))                       # coefficient in m_h(x)
CK = (2.0 - np.sin(2.0)) - C1**2                     # coefficient in K(x)


def doubling_vec(u: np.ndarray) -> np.ndarray:
    """Vectorized local map f(x) = 2x (mod 1) acting elementwise."""
    return (2.0 * u) % 1.0


def Ih_vec(u: np.ndarray) -> np.ndarray:
    """Vectorized m_h(x) = C1 * sin(x)."""
    return C1 * np.sin(u)


def K_vec(u: np.ndarray) -> np.ndarray:
    """Vectorized K(x) = CK * sin(x)^2."""
    return CK * np.sin(u) ** 2


# ---------------------------------------------------------------------
# Utilities: modular difference, node-wise strength, and hub detection
# ---------------------------------------------------------------------
def moddiff(u: np.ndarray) -> np.ndarray:
    """
    Wrap values into (-0.5, 0.5] by subtracting the nearest integer.
    Useful for measuring errors on the circle.
    """
    return ((u + 0.5) % 1.0) - 0.5


def compute_strength(traj: np.ndarray) -> np.ndarray:
    """
    Per-node mean absolute innovation relative to the local map:
      S_i = < |x_{t+1,i} - f(x_{t,i})|_{mod 1} >_t.

    Parameters
    ----------
    traj : np.ndarray (N x T)

    Returns
    -------
    np.ndarray (N,)
        Mean wrapped absolute error for each node.
    """
    x, x1 = traj[:, :-1], traj[:, 1:]
    Delta = moddiff(x1 - doubling_vec(x))
    return np.abs(Delta).mean(axis=1)


def gmm_hubs(S: np.ndarray, seed: int = 0) -> np.ndarray:
    """
    Identify hubs by a 2-component Gaussian Mixture Model (GMM) fitted to S.
    The component with the larger mean is labeled as hubs.

    Parameters
    ----------
    S : np.ndarray (N,)
        Node-wise strengths.
    seed : int

    Returns
    -------
    np.ndarray (N,) of bool
        True for nodes classified as hubs.
    """
    g = GaussianMixture(2, random_state=seed).fit(S.reshape(-1, 1))
    return g.predict(S.reshape(-1, 1)) == np.argmax(g.means_)


# ---------------------------------------------------------------------
# Residual variance at a single hub:
#   y_t = x_{t+1} - f(x_t)   (wrapped)
#   s_t = m_h(x_t)
#   beta = argmin_b || y + b * s ||_2^2  =  -(y·s)/(s·s)
#   residual r_t = y_t + beta s_t
#   V = Var_t(r_t)
# ---------------------------------------------------------------------
def resid_var_one(traj_i: np.ndarray, eps: float = 1e-12) -> float:
    """
    Residual variance for a single node time series.

    Parameters
    ----------
    traj_i : np.ndarray (T,)
        Time series of a single hub.
    eps : float
        Threshold to guard against division by zero in LS.

    Returns
    -------
    float
        Variance of residuals r_t = y_t + beta s_t.
    """
    x = traj_i[:-1]
    y = moddiff(traj_i[1:] - doubling_vec(x))
    s = Ih_vec(x)
    denom = float(s @ s)
    beta = 0.0 if denom < eps else -(y @ s) / denom
    resid = y + beta * s
    return float(np.var(resid))


# ---------------------------------------------------------------------
# Segment-level statistics for hubs:
#   V_hat = mean residual variance across the two hubs,
#   K_hat = mean of K(x_t) across the two hubs (i.e. empirical E K(x))
# ---------------------------------------------------------------------
def hub_stats_segment(traj: np.ndarray, seed: int = 0) -> Tuple[float, float, np.ndarray]:
    """
    Compute (V_hat, K_hat) for a single B/C candidate segment.

    Parameters
    ----------
    traj : np.ndarray (N x T)
        Segment trajectory.
    seed : int
        RNG seed used by GMM.

    Returns
    -------
    (V_hat, K_hat, hubs)
        V_hat : float
            Mean residual variance across the two hubs in the segment.
        K_hat : float
            Mean of K(x_t) across the two hubs (empirical expectation).
        hubs : np.ndarray (size 2)
            Indices of the two hubs.
    """
    S = compute_strength(traj)
    hubs = np.where(gmm_hubs(S, seed=seed))[0]
    if hubs.size != 2:
        raise RuntimeError("This segment is not type B/C (number of hubs ≠ 2).")
    V_list, K_list = [], []
    for i in hubs:
        xi = traj[i]
        V_list.append(resid_var_one(xi))
        K_list.append(float(np.mean(K_vec(xi[:-1]))))  # empirical E[K(x)] along the hub
    V_hat = float(np.mean(V_list))
    K_hat = float(np.mean(K_list))
    return V_hat, K_hat, hubs


# ---------------------------------------------------------------------
# Single-segment coarse classification: A vs (B/C)
# ---------------------------------------------------------------------
def classify_A_and_BC(traj: np.ndarray, N: int) -> str:
    """
    Decide whether a single segment corresponds to A (one hub) or B/C (two hubs).

    Returns
    -------
    str : "A_N" or "B_N and C_N"
    """
    S = compute_strength(traj)
    hubs = np.where(gmm_hubs(S))[0]
    return "A_N" if hubs.size == 1 else "B_N and C_N"


# ---------------------------------------------------------------------
# Two-segment B vs C classifier via alpha^2-consistency
#   For each segment s (s=1,2):
#      - compute V_s and K_s from the hubs,
#      - form S_{s,B} = F_B K_s, S_{s,C} = F_C K_s,
#   Then compare the two hypotheses: (seg1=B, seg2=C) vs (seg1=C, seg2=B)
#   by the log-mismatch of the implied alpha^2.
# ---------------------------------------------------------------------
def classify_B_vs_C_two_segments(
    traj1: np.ndarray, traj2: np.ndarray, N: int, seed: int = 0
) -> Dict[str, Any]:
    """
    Classify which of the two segments is B and which is C under the assumption
    that both segments share the same (unknown) coupling strength alpha.

    Parameters
    ----------
    traj1, traj2 : np.ndarray (N x T)
        Two B/C candidate segments.
    N : int
        Number of nodes in the graph.
    seed : int
        RNG seeds used by the GMM calls.

    Returns
    -------
    dict
        A dictionary with the decision and useful diagnostics:
          - 'label'      : 'first_is_B' or 'first_is_C'
          - 'alpha2_BC'  : (alpha^2 estimate if seg1=B, seg2=C)
          - 'alpha2_CB'  : (alpha^2 estimate if seg1=C, seg2=B)
          - 'D_BC','D_CB': log-mismatches under the two hypotheses
          - 'V1','K1','V2','K2'
          - 'hubs1','hubs2'
          - 'fac_B','fac_C' (the topology factors used)
    """
    V1, K1, hubs1 = hub_stats_segment(traj1, seed=seed)
    V2, K2, hubs2 = hub_stats_segment(traj2, seed=seed + 1)

    # Topology factors d/Delta^2 (choose the convention you prefer).
    # Here we use: B → 1/(N-2), C → 1/(N/2 - 1).
    fac_B = 1.0 / (N - 2)
    fac_C = 1.0 / (N // 2 - 1)

    # Theoretical scalings without alpha^2
    S1B, S1C = fac_B * K1, fac_C * K1
    S2B, S2C = fac_B * K2, fac_C * K2

    # Alpha^2 estimates under the two global assignments
    a2_1_B, a2_2_C = V1 / S1B, V2 / S2C  # hypothesis: (seg1=B, seg2=C)
    a2_1_C, a2_2_B = V1 / S1C, V2 / S2B  # hypothesis: (seg1=C, seg2=B)

    # Log-mismatch of alpha^2 under each hypothesis
    D_BC = abs(np.log(a2_1_B) - np.log(a2_2_C))
    D_CB = abs(np.log(a2_1_C) - np.log(a2_2_B))

    label = "first_is_B" if D_BC < D_CB else "first_is_C"

    return {
        "label": label,
        "alpha2_BC": (a2_1_B, a2_2_C),
        "alpha2_CB": (a2_1_C, a2_2_B),
        "D_BC": D_BC,
        "D_CB": D_CB,
        "V1": V1,
        "K1": K1,
        "V2": V2,
        "K2": K2,
        "hubs1": hubs1,
        "hubs2": hubs2,
        "fac_B": fac_B,
        "fac_C": fac_C,
    }


# ---------------------------------------------------------------------
# Demonstration
# ---------------------------------------------------------------------
if __name__ == "__main__":
    N, T, discard = 50, 8000, 800
    alpha = "0.25"

    # 1) Single-segment A vs (B/C)
    for gname, maker in [("A_N", graph_A), ("B_N", graph_B), ("C_N", graph_C)]:
        traj = GraphSystemDecimal(maker(N), alpha=alpha, seed=hash(gname) % 2**32).run(
            T, discard
        )
        print(f"{gname}  → classify_ABC → {classify_A_and_BC(traj, N)}")

    # 2) Two segments: B vs C (alpha unknown but identical across segments)
    trajB = GraphSystemDecimal(graph_B(N), alpha=alpha, seed=1).run(T, discard)
    trajC = GraphSystemDecimal(graph_C(N), alpha=alpha, seed=2).run(T, discard)

    out = classify_B_vs_C_two_segments(trajB, trajC, N)
    print("\nB/C decision:", out["label"])
    print(
        f"D_BC={out['D_BC']:.3e} (seg1=B, seg2=C) | "
        f"D_CB={out['D_CB']:.3e} (seg1=C, seg2=B)"
    )
    print(f"alpha^2 under (B,C) = {out['alpha2_BC']}")
    print(f"alpha^2 under (C,B) = {out['alpha2_CB']}")


A_N  → classify_ABC → A_N
B_N  → classify_ABC → B_N and C_N
C_N  → classify_ABC → B_N and C_N

B/C decision: first_is_B
D_BC=2.339e-02 | D_CB=1.404e+00
S1B=1.172166e-03, S1C=2.393173e-03 | V1=7.505085e-05
S2B=1.170900e-03, S2C=2.390587e-03 | V2=1.495241e-04


In [3]:
# -*- coding: utf-8 -*-
# star_network_identify.py  (revised B/C decision via α²-consistency + hub empirical law)
from __future__ import annotations
import numpy as np
import mpmath as mp
from decimal import Decimal, getcontext
from typing import Callable, Tuple
from sklearn.mixture import GaussianMixture

# ---------------- High precision core (unchanged except optional new coupling) ----------
getcontext().prec = 200
mp.mp.dps = getcontext().prec

class GraphSystemDecimal:
    def __init__(self, A: np.ndarray, alpha: str = '0.25',
                 local_map: Callable[[Decimal], Decimal] | None = None,
                 coupling_fn: Callable[[Decimal, Decimal], Decimal] | None = None,
                 seed: int = 0):
        self.A = np.asarray(A, dtype=float)
        self.N = self.A.shape[0]
        self.Delta = self.A.sum(axis=0).max()
        self.alpha = Decimal(alpha)
        self.local_map = local_map or (lambda x: (Decimal(2) * x) % 1)   # doubling
        self.coupling = coupling_fn or coupling_sin_sin                   # NEW default
        self.rng = np.random.default_rng(seed)
        self.reset()

    def _coupling_term(self):
        incr = [Decimal(0)] * self.N
        for j in range(self.N):
            if self.A[j].sum() == 0:
                continue
            for i in range(self.N):
                if self.A[j, i]:
                    incr[i] += self.coupling(self.x[j], self.x[i])
        d = Decimal(str(self.Delta))
        return [v / d for v in incr]

    def step(self):
        xn = [(Decimal(2) * x) % 1 for x in self.x]  # f(x)=2x mod 1
        coup = self._coupling_term()
        xn = [(xi + self.alpha * ci) % 1 for xi, ci in zip(xn, coup)]
        self.x = xn
        return xn

    def reset(self):
        self.x = [Decimal(str(v)) for v in self.rng.random(self.N)]

    def run(self, T: int, discard: int = 0):
        traj = np.zeros((self.N, max(0, T - discard)))
        for k in range(T):
            xt = self.step()
            if k >= discard:
                traj[:, k - discard] = [float(v) for v in xt]
        return traj

# ---------------- Graph generators (fix split bug in C) ---------------------
def graph_A(N: int):
    A = np.zeros((N, N))
    A[np.arange(N - 1), N - 1] = 1
    return A

def graph_B(N: int):
    A = np.zeros((N, N))
    leaves = np.arange(N - 2)
    A[leaves, N - 1] = 1
    A[leaves, N - 2] = 1
    return A

def graph_C(N: int):
    # leaves: 0..N-3 ; split evenly
    A = np.zeros((N, N))
    L = N - 2
    half = L // 2
    first = np.arange(0, half)
    second = np.arange(half, L)
    A[first,  N - 2] = 1
    A[second, N - 1] = 1
    return A

# ---------------- Model-specific h, its integral and variance kernel --------
# h(x,y) = 2 sin x sin y  (x,y ∈ [0,1) identified with the circle)
def coupling_sin_sin(xs: Decimal, xt: Decimal) -> Decimal:
    v = 2.0 * float(mp.sin(mp.mpf(str(xs)))) * float(mp.sin(mp.mpf(str(xt))))
    return Decimal(str(v))

C1 = 2.0 * (1.0 - np.cos(1.0))                      # m_h(x) = C1 * sin x
CK = (2.0 - np.sin(2.0)) - C1**2                     # K(x) = CK*sin^2 x

def doubling_vec(u: np.ndarray) -> np.ndarray:
    return (2.0 * u) % 1.0

def Ih_vec(u: np.ndarray) -> np.ndarray:
    return C1 * np.sin(u)

def K_vec(u: np.ndarray) -> np.ndarray:
    return CK * np.sin(u) ** 2

# ---------------- Utilities: wrap, strength, GMM hubs -----------------------
def moddiff(u):
    return ((u + 0.5) % 1.0) - 0.5

def compute_strength(traj: np.ndarray) -> np.ndarray:
    x, x1 = traj[:, :-1], traj[:, 1:]
    Delta = moddiff(x1 - doubling_vec(x))
    return np.abs(Delta).mean(axis=1)

def gmm_hubs(S, seed=0):
    g = GaussianMixture(2, random_state=seed).fit(S.reshape(-1, 1))
    return g.predict(S.reshape(-1, 1)) == np.argmax(g.means_)

# ---------------- Residual variance at one hub (regress on m_h) ------------
def resid_var_one(traj_i: np.ndarray, eps: float = 1e-12) -> float:
    x = traj_i[:-1]
    y = moddiff(traj_i[1:] - doubling_vec(x))
    s = Ih_vec(x)
    denom = float(s @ s)
    beta = 0.0 if denom < eps else -(y @ s) / denom
    resid = y + beta * s
    return float(np.var(resid))

# -------- Segment-level stats: use hub empirical law for K ------------------
def hub_stats_segment(traj: np.ndarray, seed: int = 0) -> Tuple[float, float, np.ndarray]:
    S    = compute_strength(traj)
    hubs = np.where(gmm_hubs(S, seed=seed))[0]
    if hubs.size != 2:
        raise RuntimeError("segment is not B/C type (number of hubs ≠ 2)")
    V_list, K_list = [], []
    for i in hubs:
        xi = traj[i]
        V_list.append(resid_var_one(xi))
        K_list.append(float(np.mean(K_vec(xi[:-1]))))  # <-- 经验分布进入 K 的期望
    V_hat = float(np.mean(V_list))
    K_hat = float(np.mean(K_list))
    return V_hat, K_hat, hubs

# ---------------- A vs (B/C) (保持不变) -------------------------------------
def classify_A_and_BC(traj: np.ndarray, N: int) -> str:
    S = compute_strength(traj)
    hubs = np.where(gmm_hubs(S))[0]
    return "A_N" if hubs.size == 1 else "B_N and C_N"

# ---------------- B vs C：α²一致性（两段输入） -----------------------------
def classify_B_vs_C_two_segments(traj1: np.ndarray, traj2: np.ndarray, N: int, seed: int = 0):
    V1, K1, hubs1 = hub_stats_segment(traj1, seed=seed)
    V2, K2, hubs2 = hub_stats_segment(traj2, seed=seed+1)

    fac_B = 1.0 / (N - 2)          # d/Δ^2 for B
    fac_C = 1.0 / (N // 2 - 1)     # d/Δ^2 for C

    a2_1_B = V1 / (fac_B * K1)
    a2_1_C = V1 / (fac_C * K1)
    a2_2_B = V2 / (fac_B * K2)
    a2_2_C = V2 / (fac_C * K2)

    D_BC = abs(np.log(a2_1_B) - np.log(a2_2_C))   # seg1=B, seg2=C
    D_CB = abs(np.log(a2_1_C) - np.log(a2_2_B))   # seg1=C, seg2=B
    label = "first_is_B" if D_BC < D_CB else "first_is_C"

    return {
        "label": label,
        "alpha2_BC": (a2_1_B, a2_2_C),
        "alpha2_CB": (a2_1_C, a2_2_B),
        "D_BC": D_BC, "D_CB": D_CB,
        "V1": V1, "K1": K1, "V2": V2, "K2": K2,
        "hubs1": hubs1, "hubs2": hubs2,
        "fac_B": fac_B, "fac_C": fac_C
    }

# ---------------- Demo ------------------------------------------------------
if __name__ == "__main__":
    N, T, discard = 50, 8000, 800
    alpha = '0.25'

    # 单段：A vs (B/C)
    for gname, maker in [("A_N", graph_A), ("B_N", graph_B), ("C_N", graph_C)]:
        traj = GraphSystemDecimal(maker(N), alpha=alpha, seed=hash(gname) % 2**32).run(T, discard)
        print(f"{gname}  → classify_ABC → {classify_A_and_BC(traj, N)}")

    # 两段：B vs C（α未知但一致）
    trajB = GraphSystemDecimal(graph_B(N), alpha=alpha, seed=1).run(T, discard)
    trajC = GraphSystemDecimal(graph_C(N), alpha=alpha, seed=2).run(T, discard)

    out = classify_B_vs_C_two_segments(trajB, trajC, N)
    print("\nB/C decision:", out['label'])
    print(f"D_BC={out['D_BC']:.3e} (seg1=B, seg2=C) | D_CB={out['D_CB']:.3e} (seg1=C, seg2=B)")
    print(f"alpha^2 under (B,C) = {out['alpha2_BC']}")
    print(f"alpha^2 under (C,B) = {out['alpha2_CB']}")


A_N  → classify_ABC → A_N
B_N  → classify_ABC → B_N and C_N
C_N  → classify_ABC → B_N and C_N

B/C decision: first_is_B
D_BC=2.775e-03 (seg1=B, seg2=C) | D_CB=1.384e+00 (seg1=C, seg2=B)
alpha^2 under (B,C) = (0.06272079766609986, 0.06254701302597973)
alpha^2 under (C,B) = (0.03136039883304993, 0.12509402605195946)
