# Week 8 — Topos Regularizers via Overlap Consistency
We compare two candidate causal graphs using a sheaf-style overlap penalty.
Lower penalty suggests better consistency across local contexts.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

rng = np.random.default_rng(0)

def topo_order(A):
    d = A.shape[0]
    indeg = A.sum(0).astype(int).tolist()
    order, S = [], [i for i in range(d) if indeg[i]==0]
    while S:
        v = S.pop()
        order.append(v)
        for w in range(d):
            if A[v,w] != 0:
                indeg[w] -= 1
                if indeg[w] == 0:
                    S.append(w)
    return order

def sample_sem(W, n, rng):
    d = W.shape[0]
    order = topo_order((W != 0).astype(int))
    X = np.zeros((n, d), dtype=float)
    for t in range(n):
        for j in order:
            parents = np.where(W[:, j] != 0)[0]
            mean = float(np.dot(X[t, parents], W[parents, j])) if len(parents) else 0.0
            X[t, j] = mean + rng.normal(0, 1.0)
    return X

def build_contexts_from_edges(edges):
    parents = {}
    for u, v in edges:
        parents.setdefault(v, []).append(u)
        parents.setdefault(u, parents.get(u, []))
    for v in list(parents.keys()):
        parents[v] = sorted(set(parents[v]))
    return {v: sorted(set([v] + parents.get(v, []))) for v in parents.keys()}

def star_cover(vars_U):
    if len(vars_U) <= 1:
        return [vars_U]
    return [[v for v in vars_U if v != w] for w in vars_U]

def sheaf_overlap_consistency(df, edges, splits=5, seed=0):
    rng = np.random.default_rng(seed)
    penalty = 0.0
    cols = list(df.columns)
    for (u, v) in edges:
        if u not in cols or v not in cols:
            continue
        x = df[u].to_numpy(dtype=float)
        y = df[v].to_numpy(dtype=float)
        n = len(x)
        if n < 20:
            continue
        for _ in range(splits):
            idx = rng.permutation(n)
            a = idx[: n//2]
            b = idx[n//2 :]
            ra = np.corrcoef(x[a], y[a])[0,1]
            rb = np.corrcoef(x[b], y[b])[0,1]
            if np.isfinite(ra) and np.isfinite(rb):
                penalty += abs(ra - rb)
    return penalty
                    # simple RBF MMD

# True graph
A_true = np.array([
    [0,1,0,0],
    [0,0,1,0],
    [0,0,0,1],
    [0,0,0,0],
], dtype=int)
W_true = A_true * rng.uniform(0.6, 0.9, size=A_true.shape)
X = sample_sem(W_true, 1500, rng)
df = pd.DataFrame(X, columns=['X0','X1','X2','X3'])

# Candidate graphs
edges_true = [('X0','X1'), ('X1','X2'), ('X2','X3')]
edges_wrong = [('X0','X2'), ('X0','X3'), ('X1','X3')]

pen_true = sheaf_overlap_consistency(df, edges_true)
pen_wrong = sheaf_overlap_consistency(df, edges_wrong)

print('Overlap penalty (true):', round(pen_true, 4))
print('Overlap penalty (wrong):', round(pen_wrong, 4))


In [None]:
plt.figure(figsize=(4,3))
plt.bar(['true','wrong'], [pen_true, pen_wrong], color=['#4c72b0','#dd8452'])
plt.ylabel('Overlap penalty')
plt.title('Sheaf consistency prefers true graph')
plt.tight_layout()
plt.show()
