In [2]:
from autobound.causalProblem import causalProblem
from autobound.DAG import DAG
from autobound.Query import Query
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Goal
Find a $Z \subseteq V$ s.t. $Z$ is outside of any Hedge for some $Q$, but $pot_Q(Z) > 0$

## DAG 0
$Z \to X \to Y$

$X \leftrightarrow Y$, $Z \leftrightarrow Y$

$Z$ is inside the Hedge. I still want to know if $pot(Z) > 0$

*Result:* $pot(Z) > 0$ ($do(Z)$ even identifies Q)

In [3]:
# ---------- 1) Synthetic SCM for: Z -> X -> Y; U_XY -> {X,Y}; U_ZY -> {Z,Y} ----------
def synth_scm_Z_X_Y(n=20, p=0.5, seed=0):
    """
    Returns:
      obs  : P[z,x,y]     (shape (2,2,2))
      doZ  : dict z -> P[x,y | do(Z=z)]  (each shape (2,2))
      p_y1_do_x1 : ground-truth P(Y=1 | do(X=1))
    """
    rng = np.random.default_rng(seed)

    # Exogenous latents (independent Bernoulli(p))
    U_XY = rng.binomial(1, p, n)
    U_ZY = rng.binomial(1, p, n)

    # Structural (boolean) equations consistent with the DAG
    # Z depends on U_ZY (to realize Z<->Y without a directed Z->Y)
    Z = U_ZY.astype(int)

    # X depends on Z and U_XY (Z->X and X<->Y through U_XY)
    #   X = Z XOR U_XY
    X = np.logical_xor(Z, U_XY).astype(int)

    # Y depends on X, U_XY, U_ZY (X->Y, X<->Y, Z<->Y)
    #   Y = (X OR U_XY) XOR U_ZY
    Y = np.logical_xor(np.logical_or(X, U_XY), U_ZY).astype(int)

    def joint_prob(*cols, dims):
        idx = np.ravel_multi_index(cols, dims)
        counts = np.bincount(idx, minlength=np.prod(dims)).reshape(dims)
        return counts / counts.sum()

    # Observational P(Z,X,Y)
    obs = joint_prob(Z, X, Y, dims=(2,2,2))   # P[z,x,y]

    # do(Z=z): set Z=z, recompute downstream
    doZ = {}
    for z in (0, 1):
        Z_do = np.full(n, z, dtype=int)
        X_do = np.logical_xor(Z_do, U_XY).astype(int)
        Y_do = np.logical_xor(np.logical_or(X_do, U_XY), U_ZY).astype(int)
        doZ[z] = joint_prob(X_do, Y_do, dims=(2,2))  # P[x,y | do(Z=z)]

    # Ground truth P(Y=1 | do(X=1)): set X=1, compute Y = (1 OR U_XY) XOR U_ZY = 1 XOR U_ZY
    Y_doX1 = np.logical_xor(1, U_ZY).astype(int)
    p_y1_do_x1 = Y_doX1.mean()  # = P(U_ZY=0) = 1 - p for this choice

    return obs, doZ, p_y1_do_x1

# ---------- 2) Build the ADMG in autobound ----------
dag = DAG()
dag.from_structure(
    "Z -> X, X -> Y, "
    "U_XY -> X, U_XY -> Y, "   # X <-> Y
    "U_ZY -> Z, U_ZY -> Y",    # Z <-> Y
    unob="U_XY,U_ZY"
)

# Generate synthetic distributions
obs_tab, doZ_tabs, pstar = synth_scm_Z_X_Y(n=300_000, p=0.5, seed=np.random.randint(1_000_000))
print("Ground truth  P(Y=1 | do(X=1))  =", round(float(pstar), 4))

# ---------- 3) Problem A: observational only ----------
obs_only = causalProblem(dag, number_values={'Z':2,'X':2,'Y':2})

# Add P(Z,X,Y) from the array (no CSV)
for z in (0, 1):
    for x in (0, 1):
        for y in (0, 1):
            p = float(obs_tab[z, x, y])
            obs_only.add_constraint(obs_only.query(f'Z={z}&X={x}&Y={y}') - Query(p))

# Add simplex constraints over response-function blocks
obs_only.add_prob_constraints()

# Target: P(Y=1 | do(X=1))
obs_only.set_estimand(obs_only.query('Y(X=1)=1'))

prog_obs = obs_only.write_program()
lb_obs, ub_obs = prog_obs.run_pyomo('glpk', verbose=False)   # linear → LP solver is best
print(f"[OBS only]  P(Y=1 | do(X=1)) ∈ [{lb_obs:.4f}, {ub_obs:.4f}]")
print(f"W = {ub_obs - lb_obs:.4f}")

# ---------- 4) Problem B: observational + do(Z) ----------
obs_plus_doZ = causalProblem(dag, number_values={'Z':2,'X':2,'Y':2})

# Add the same observational joint
for z in (0, 1):
    for x in (0, 1):
        for y in (0, 1):
            p = float(obs_tab[z, x, y])
            obs_plus_doZ.add_constraint(obs_plus_doZ.query(f'Z={z}&X={x}&Y={y}') - Query(p))

obs_plus_doZ.add_prob_constraints()

# Add uniform (or synthetic) P(X,Y | do(Z=z)) from doZ_tabs
# Here we use the *synthetic* doZ we generated above
for z in (0, 1):
    for x in (0, 1):
        for y in (0, 1):
            p = float(doZ_tabs[z][x, y])
            # Encode P(X=x, Y=y | do(Z=z)) with potential outcomes
            lhs = obs_plus_doZ.query(f'X(Z={z})={x}&Y(Z={z})={y}')
            obs_plus_doZ.add_constraint(lhs - Query(p))

# Same target
obs_plus_doZ.set_estimand(obs_plus_doZ.query('Y(X=1)=1'))

prog_doZ = obs_plus_doZ.write_program()
lb_doZ, ub_doZ = prog_doZ.run_pyomo('glpk', verbose=False)
print(f"[OBS + do(Z)] P(Y=1 | do(X=1)) ∈ [{lb_doZ:.4f}, {ub_doZ:.4f}]")
print(f"W = {ub_doZ - lb_doZ:.4f}")


Ground truth  P(Y=1 | do(X=1))  = 0.499
[OBS only]  P(Y=1 | do(X=1)) ∈ [0.2492, 0.7505]
W = 0.5013
[OBS + do(Z)] P(Y=1 | do(X=1)) ∈ [0.4990, 0.4990]
W = 0.0000


## DAG 1
Variables: X, Y, Z, W
Directed edges:

Z → W
Z → Y
W → X
W → Y
X → Y

Bidirected edge:

X ↔ Y (representing unmeasured confounding)

In [None]:
# pip install pulp if needed
from itertools import product
import pulp

# -----------------------------
# 1) Problem data (from prompt)
# -----------------------------
# DAG structure:
# U_X_Y -> X, U_X_Y -> Y, Z -> W, Z -> Y, W -> X, W -> Y, X -> Y
# (We encode unobserved confounding between X and Y via joint response function types for the c-component {X,Y}.)

# Observational P(Z,W,X,Y)  (16 cells)
obs = {
 (0,0,0,0): 0.0,
 (0,0,0,1): 0.0,
 (0,0,1,0): 0.0,
 (0,0,1,1): 0.0,
 (0,1,0,0): 0.0,
 (0,1,0,1): 0.0,
 (0,1,1,0): 0.0,
 (0,1,1,1): 0.6,
 (1,0,0,0): 0.0,
 (1,0,0,1): 0.3,
 (1,0,1,0): 0.1,
 (1,0,1,1): 0.0,
 (1,1,0,0): 0.0,
 (1,1,0,1): 0.0,
 (1,1,1,0): 0.0,
 (1,1,1,1): 0.0,
}
# Sanity: sums to 1
assert abs(sum(obs.values()) - 1.0) < 1e-12

# Interventional P(W,X,Y | do(Z=z))
doZ = {
 0: {(0,0,0):0.0, (0,0,1):0.0, (0,1,0):0.0, (0,1,1):0.0,
     (1,0,0):0.0, (1,0,1):0.1, (1,1,0):0.0, (1,1,1):0.9},
 1: {(0,0,0):0.0, (0,0,1):0.9, (0,1,0):0.1, (0,1,1):0.0,
     (1,0,0):0.0, (1,0,1):0.0, (1,1,0):0.0, (1,1,1):0.0},
}
# sums to 1 per z
for z in [0,1]:
    assert abs(sum(doZ[z].values()) - 1.0) < 1e-12

# Ground truth for reference (not used by the LP objective directly; we will bound it)
gt = 0.9

# -----------------------------------------
# 2) Enumerate response-function type space
# -----------------------------------------
# W response types: W: {0,1} -> {0,1}
W_types = [(w0, w1) for w0, w1 in product([0,1],[0,1])]  # 4 types

# X response types: X: {w} -> {0,1}
X_types = [(x0, x1) for x0, x1 in product([0,1],[0,1])]  # 4 types, x(w)=xw

# Y response types: Y: {z,w,x} -> {0,1}  (8 inputs)
# encode by tuple (y000, y001, y010, y011, y100, y101, y110, y111) in lex order (z,w,x)
def idx3(z,w,x): return (z<<2)|(w<<1)|x
Y_types = []
for bits in product([0,1], repeat=8):
    Y_types.append(tuple(bits))  # 256 types

# XY c-component type = (X_type, Y_type)
XY_types = [(xt, yt) for xt in X_types for yt in Y_types]  # 4*256 = 1024

# Helper evaluators
def W_of(tW, z):
    w0,w1 = tW
    return w0 if z==0 else w1

def X_of(tXY, w):
    xt, yt = tXY
    x0,x1 = xt
    return x0 if w==0 else x1

def Y_of(tXY, z, w, x):
    xt, yt = tXY
    return yt[idx3(z,w,x)]

# ---------------------------------------------
# 3) Decision vars θ[z, tW, tXY] >= 0, sum to 1
# ---------------------------------------------
# Indices
Z_vals   = [0,1]
W_T_idx  = list(range(len(W_types)))
XY_T_idx = list(range(len(XY_types)))

# Create LP problem factory
def build_lp(sense='min', do_Z=False):
    prob = pulp.LpProblem("Bounds_P_Y1_doX1", pulp.LpMinimize if sense=='min' else pulp.LpMaximize)

    theta = pulp.LpVariable.dicts(
        "theta",
        (Z_vals, W_T_idx, XY_T_idx),
        lowBound=0.0, upBound=None, cat='Continuous'
    )

    # Sum-to-one
    prob += pulp.lpSum(theta[z][iW][iXY] for z in Z_vals for iW in W_T_idx for iXY in XY_T_idx) == 1.0, "total_prob"

    # ------------- Observational constraints -------------
    # For each (z,w,x,y): sum types producing that outcome must equal P(z,w,x,y)
    for (z,w,x,y), p in obs.items():
        prob += (
            pulp.lpSum(
                theta[z][iW][iXY]
                for iW in W_T_idx
                for iXY in XY_T_idx
                if (W_of(W_types[iW], z) == w) and
                   (X_of(XY_types[iXY], w) == x) and
                   (Y_of(XY_types[iXY], z, w, x) == y)
            ) == p
        ), f"obs_{z}{w}{x}{y}"


    if do_Z:
        # ------------- Interventional constraints do(Z=z0) -------------
        # For do(Z=z0), we clamp Z=z0 externally, so we must sum over BOTH latent z=0 and z=1 types.
        for z0 in [0,1]:
            for (w,x,y), p in doZ[z0].items():
                prob += (
                    pulp.lpSum(
                        theta[z_lat][iW][iXY]
                        for z_lat in [0,1]           # sum over original Z-types
                        for iW in W_T_idx
                        for iXY in XY_T_idx
                        if (W_of(W_types[iW], z0) == w) and
                        (X_of(XY_types[iXY], w) == x) and
                        (Y_of(XY_types[iXY], z0, w, x) == y)
                    ) == p
                ), f"doZ{z0}_{w}{x}{y}"

    # ------------- Objective: P(Y=1 | do(X=1)) -------------
    # Under do(X=1), we clamp X=1; Z stays as drawn; W = W(Z); Y = Y(Z, W(Z), 1)
    obj = pulp.lpSum(
        theta[z][iW][iXY] *
        (1 if Y_of(XY_types[iXY], z, W_of(W_types[iW], z), 1) == 1 else 0)
        for z in Z_vals for iW in W_T_idx for iXY in XY_T_idx
    )
    prob += obj, "P_Y1_doX1"

    return prob, theta

# --------------------------
# 4) Solve min and max bounds
# --------------------------
min_prob_obs, _ = build_lp('min', do_Z=False)
min_status = min_prob_obs.solve(pulp.PULP_CBC_CMD(msg=False))
lb = pulp.value(min_prob_obs.objective)

max_prob_obs, _ = build_lp('max', do_Z=False)
max_status = max_prob_obs.solve(pulp.PULP_CBC_CMD(msg=False))
ub = pulp.value(max_prob_obs.objective)

print(f"Ground truth P(Y=1 | do(X=1)) = {gt:.4f}")
print(f"Obs only: [{lb:.4f}, {ub:.4f}], W = {ub - lb:.4f}")

min_prob, _ = build_lp('min', do_Z=True)
min_status = min_prob.solve(pulp.PULP_CBC_CMD(msg=False))
lb = pulp.value(min_prob.objective)

max_prob, _ = build_lp('max', do_Z=True)
max_status = max_prob.solve(pulp.PULP_CBC_CMD(msg=False))
ub = pulp.value(max_prob.objective)

print(f"doZ + Obs: [{lb:.4f}, {ub:.4f}], W = {ub - lb:.4f}")



Ground truth P(Y=1 | do(X=1)) = 0.9000
Obs only: [0.6000, 0.9000], W = 0.3000


TypeError: 'bool' object is not subscriptable