In [3]:
import pulp
import numpy as np

def compute_ate_bounds(obs_data, E0=None, E1=None, verbose=False):
    """
    Compute ATE bounds using linear programming.
    
    Args:
        obs_data: dict with keys 'P_X1', 'P_Y1_X1', 'P_Y1_X0', 'P_Y1'
        E0: value of P(Y=1|do(X=0)) if known, None otherwise
        E1: value of P(Y=1|do(X=1)) if known, None otherwise
        verbose: if True, print debug information
    
    Returns:
        tuple: (lower_bound, upper_bound) for ATE
    """
    # Extract observational data
    P_X1 = obs_data['P_X1']
    P_X0 = 1 - P_X1
    P_Y1_X1 = obs_data['P_Y1_X1']
    P_Y1_X0 = obs_data['P_Y1_X0']
    P_Y1 = obs_data['P_Y1']
    P_Y0 = 1 - P_Y1
    
    # Compute joint probabilities from observational data
    P_Y1_and_X1 = P_Y1_X1 * P_X1
    P_Y0_and_X1 = (1 - P_Y1_X1) * P_X1
    P_Y1_and_X0 = P_Y1_X0 * P_X0
    P_Y0_and_X0 = (1 - P_Y1_X0) * P_X0
    
    # Solve for lower bound
    prob_min = pulp.LpProblem("ATE_Lower_Bound", pulp.LpMinimize)
    
    # Define q variables: q[i] = P(Y1=y1, Y0=y0, X=x)
    q = [pulp.LpVariable(f"q{i}", lowBound=0, upBound=1) for i in range(8)]
    
    # Objective: minimize ATE = q4 + q5 - q2 - q3
    prob_min += q[4] + q[5] - q[2] - q[3], "ATE"
    
    # Constraints
    # 1. Probabilities sum to 1
    prob_min += pulp.lpSum(q) == 1, "Sum_to_one"
    
    # 2. Marginal constraint: P(X=1)
    prob_min += q[1] + q[3] + q[5] + q[7] == P_X1, "P_X1"
    
    # 3. Joint constraint: P(Y=1, X=1)
    prob_min += q[5] + q[7] == P_Y1_and_X1, "P_Y1_X1"
    
    # 4. Joint constraint: P(Y=1, X=0)
    prob_min += q[4] + q[6] == P_Y1_and_X0, "P_Y1_X0"
    
    # 5. Joint constraint: P(Y=0, X=1)
    prob_min += q[1] + q[3] == P_Y0_and_X1, "P_Y0_X1"
    
    # 6. Joint constraint: P(Y=0, X=0)
    prob_min += q[0] + q[2] == P_Y0_and_X0, "P_Y0_X0"
    
    # Add experimental constraints if available
    if E0 is not None:
        prob_min += q[2] + q[3] + q[6] + q[7] == E0, "E0_constraint"
    
    if E1 is not None:
        prob_min += q[4] + q[5] + q[6] + q[7] == E1, "E1_constraint"
    
    # Solve for minimum
    prob_min.solve(pulp.PULP_CBC_CMD(msg=0))
    
    if prob_min.status != pulp.LpStatusOptimal:
        if verbose:
            print(f"Warning: Minimum problem status: {pulp.LpStatus[prob_min.status]}")
        return None, None
    
    ate_min = pulp.value(prob_min.objective)
    
    # Solve for upper bound
    prob_max = pulp.LpProblem("ATE_Upper_Bound", pulp.LpMaximize)
    
    # Define new q variables for max problem
    q_max = [pulp.LpVariable(f"q_max{i}", lowBound=0, upBound=1) for i in range(8)]
    
    # Objective: maximize ATE
    prob_max += q_max[4] + q_max[5] - q_max[2] - q_max[3], "ATE"
    
    # Same constraints
    prob_max += pulp.lpSum(q_max) == 1, "Sum_to_one"
    prob_max += q_max[1] + q_max[3] + q_max[5] + q_max[7] == P_X1, "P_X1"
    prob_max += q_max[5] + q_max[7] == P_Y1_and_X1, "P_Y1_X1"
    prob_max += q_max[4] + q_max[6] == P_Y1_and_X0, "P_Y1_X0"
    prob_max += q_max[1] + q_max[3] == P_Y0_and_X1, "P_Y0_X1"
    prob_max += q_max[0] + q_max[2] == P_Y0_and_X0, "P_Y0_X0"
    
    if E0 is not None:
        prob_max += q_max[2] + q_max[3] + q_max[6] + q_max[7] == E0, "E0_constraint"
    
    if E1 is not None:
        prob_max += q_max[4] + q_max[5] + q_max[6] + q_max[7] == E1, "E1_constraint"
    
    # Solve for maximum
    prob_max.solve(pulp.PULP_CBC_CMD(msg=0))
    
    if prob_max.status != pulp.LpStatusOptimal:
        if verbose:
            print(f"Warning: Maximum problem status: {pulp.LpStatus[prob_max.status]}")
        return None, None
    
    ate_max = pulp.value(prob_max.objective)
    
    return ate_min, ate_max


def find_experimental_bounds(obs_data):
    """
    Find the feasible range for E0 and E1 given observational data.
    """
    # Find bounds for E0 = P(Y=1|do(X=0))
    e0_bounds = []
    for target in ['min', 'max']:
        prob = pulp.LpProblem(f"E0_{target}", 
                             pulp.LpMinimize if target == 'min' else pulp.LpMaximize)
        
        q = [pulp.LpVariable(f"q{i}", lowBound=0, upBound=1) for i in range(8)]
        
        # Objective: E0 = q2 + q3 + q6 + q7
        prob += q[2] + q[3] + q[6] + q[7], "E0"
        
        # Add observational constraints
        P_X1 = obs_data['P_X1']
        P_X0 = 1 - P_X1
        P_Y1_X1 = obs_data['P_Y1_X1']
        P_Y1_X0 = obs_data['P_Y1_X0']
        
        prob += pulp.lpSum(q) == 1
        prob += q[1] + q[3] + q[5] + q[7] == P_X1
        prob += q[5] + q[7] == P_Y1_X1 * P_X1
        prob += q[4] + q[6] == P_Y1_X0 * P_X0
        prob += q[1] + q[3] == (1 - P_Y1_X1) * P_X1
        prob += q[0] + q[2] == (1 - P_Y1_X0) * P_X0
        
        prob.solve(pulp.PULP_CBC_CMD(msg=0))
        if prob.status == pulp.LpStatusOptimal:
            e0_bounds.append(pulp.value(prob.objective))
    
    # Find bounds for E1 = P(Y=1|do(X=1))
    e1_bounds = []
    for target in ['min', 'max']:
        prob = pulp.LpProblem(f"E1_{target}", 
                             pulp.LpMinimize if target == 'min' else pulp.LpMaximize)
        
        q = [pulp.LpVariable(f"q{i}", lowBound=0, upBound=1) for i in range(8)]
        
        # Objective: E1 = q4 + q5 + q6 + q7
        prob += q[4] + q[5] + q[6] + q[7], "E1"
        
        # Add observational constraints
        prob += pulp.lpSum(q) == 1
        prob += q[1] + q[3] + q[5] + q[7] == P_X1
        prob += q[5] + q[7] == P_Y1_X1 * P_X1
        prob += q[4] + q[6] == P_Y1_X0 * P_X0
        prob += q[1] + q[3] == (1 - P_Y1_X1) * P_X1
        prob += q[0] + q[2] == (1 - P_Y1_X0) * P_X0
        
        prob.solve(pulp.PULP_CBC_CMD(msg=0))
        if prob.status == pulp.LpStatusOptimal:
            e1_bounds.append(pulp.value(prob.objective))
    
    return e0_bounds, e1_bounds


def test_experiments():
    """Test the bounds with different experimental scenarios."""
    
    print("="*60)
    print("Testing ATE bounds with different experiments")
    print("="*60)
    
    # Example 1: Where E0 gives tighter bounds
    print("\n### Example 1: E0 gives tighter bounds ###")
    obs_data1 = {
        'P_X1': 0.3,
        'P_Y1_X1': 0.8,
        'P_Y1_X0': 0.2,
        'P_Y1': 0.3 * 0.8 + 0.7 * 0.2
    }
    
    print(f"\nObservational data:")
    print(f"  P(X=1) = {obs_data1['P_X1']}")
    print(f"  P(Y=1|X=1) = {obs_data1['P_Y1_X1']}")
    print(f"  P(Y=1|X=0) = {obs_data1['P_Y1_X0']}")
    print(f"  P(Y=1) = {obs_data1['P_Y1']:.3f}")
    
    # Find feasible experimental values
    e0_bounds, e1_bounds = find_experimental_bounds(obs_data1)
    print(f"\nFeasible experimental values:")
    print(f"  E0 can be in [{e0_bounds[0]:.3f}, {e0_bounds[1]:.3f}]")
    print(f"  E1 can be in [{e1_bounds[0]:.3f}, {e1_bounds[1]:.3f}]")
    
    # Choose experimental values in the middle of feasible range
    E0_value = (e0_bounds[0] + e0_bounds[1]) / 2
    E1_value = (e1_bounds[0] + e1_bounds[1]) / 2
    
    print(f"\nChosen experimental values:")
    print(f"  E0 = {E0_value:.3f}")
    print(f"  E1 = {E1_value:.3f}")
    
    # Purely observational bounds
    ate_min_obs, ate_max_obs = compute_ate_bounds(obs_data1)
    print(f"\n1. Purely observational bounds:")
    print(f"   ATE ∈ [{ate_min_obs:.3f}, {ate_max_obs:.3f}]")
    print(f"   Width = {ate_max_obs - ate_min_obs:.3f}")
    
    # With E0
    ate_min_e0, ate_max_e0 = compute_ate_bounds(obs_data1, E0=E0_value)
    print(f"\n2. With E0 = {E0_value:.3f}:")
    print(f"   ATE ∈ [{ate_min_e0:.3f}, {ate_max_e0:.3f}]")
    print(f"   Width = {ate_max_e0 - ate_min_e0:.3f}")
    
    # With E1
    ate_min_e1, ate_max_e1 = compute_ate_bounds(obs_data1, E1=E1_value)
    print(f"\n3. With E1 = {E1_value:.3f}:")
    print(f"   ATE ∈ [{ate_min_e1:.3f}, {ate_max_e1:.3f}]")
    print(f"   Width = {ate_max_e1 - ate_min_e1:.3f}")
    
    width_e0 = ate_max_e0 - ate_min_e0
    width_e1 = ate_max_e1 - ate_min_e1
    
    if width_e0 < width_e1:
        print(f"\n>>> E0 gives tighter bounds: {width_e0:.3f} < {width_e1:.3f}")
    else:
        print(f"\n>>> E1 gives tighter bounds: {width_e1:.3f} < {width_e0:.3f}")
    
    # Example 2: Try to find where E1 gives tighter bounds
    print("\n" + "="*60)
    print("\n### Example 2: Looking for case where E1 gives tighter bounds ###")
    obs_data2 = {
        'P_X1': 0.7,  # More treated units
        'P_Y1_X1': 0.3,
        'P_Y1_X0': 0.8,  # Higher outcome rate for untreated
        'P_Y1': 0.7 * 0.3 + 0.3 * 0.8
    }
    
    print(f"\nObservational data:")
    print(f"  P(X=1) = {obs_data2['P_X1']}")
    print(f"  P(Y=1|X=1) = {obs_data2['P_Y1_X1']}")
    print(f"  P(Y=1|X=0) = {obs_data2['P_Y1_X0']}")
    print(f"  P(Y=1) = {obs_data2['P_Y1']:.3f}")
    
    # Find feasible experimental values
    e0_bounds2, e1_bounds2 = find_experimental_bounds(obs_data2)
    print(f"\nFeasible experimental values:")
    print(f"  E0 can be in [{e0_bounds2[0]:.3f}, {e0_bounds2[1]:.3f}]")
    print(f"  E1 can be in [{e1_bounds2[0]:.3f}, {e1_bounds2[1]:.3f}]")
    
    # Choose experimental values
    E0_value2 = (e0_bounds2[0] + e0_bounds2[1]) / 2
    E1_value2 = (e1_bounds2[0] + e1_bounds2[1]) / 2
    
    print(f"\nChosen experimental values:")
    print(f"  E0 = {E0_value2:.3f}")
    print(f"  E1 = {E1_value2:.3f}")
    
    # Purely observational bounds
    ate_min_obs2, ate_max_obs2 = compute_ate_bounds(obs_data2)
    print(f"\n1. Purely observational bounds:")
    print(f"   ATE ∈ [{ate_min_obs2:.3f}, {ate_max_obs2:.3f}]")
    print(f"   Width = {ate_max_obs2 - ate_min_obs2:.3f}")
    
    # With E0
    ate_min_e02, ate_max_e02 = compute_ate_bounds(obs_data2, E0=E0_value2)
    print(f"\n2. With E0 = {E0_value2:.3f}:")
    print(f"   ATE ∈ [{ate_min_e02:.3f}, {ate_max_e02:.3f}]")
    print(f"   Width = {ate_max_e02 - ate_min_e02:.3f}")
    
    # With E1
    ate_min_e12, ate_max_e12 = compute_ate_bounds(obs_data2, E1=E1_value2)
    print(f"\n3. With E1 = {E1_value2:.3f}:")
    print(f"   ATE ∈ [{ate_min_e12:.3f}, {ate_max_e12:.3f}]")
    print(f"   Width = {ate_max_e12 - ate_min_e12:.3f}")
    
    width_e02 = ate_max_e02 - ate_min_e02
    width_e12 = ate_max_e12 - ate_min_e12
    
    if width_e02 < width_e12:
        print(f"\n>>> E0 gives tighter bounds: {width_e02:.3f} < {width_e12:.3f}")
    else:
        print(f"\n>>> E1 gives tighter bounds: {width_e12:.3f} < {width_e02:.3f}")



In [4]:
test_experiments()


Testing ATE bounds with different experiments

### Example 1: E0 gives tighter bounds ###

Observational data:
  P(X=1) = 0.3
  P(Y=1|X=1) = 0.8
  P(Y=1|X=0) = 0.2
  P(Y=1) = 0.380

Feasible experimental values:
  E0 can be in [0.000, 1.000]
  E1 can be in [0.380, 0.380]

Chosen experimental values:
  E0 = 0.500
  E1 = 0.380

1. Purely observational bounds:
   ATE ∈ [-0.620, 0.380]
   Width = 1.000

2. With E0 = 0.500:
   ATE ∈ [-0.120, -0.120]
   Width = 0.000

3. With E1 = 0.380:
   ATE ∈ [-0.620, 0.380]
   Width = 1.000

>>> E0 gives tighter bounds: 0.000 < 1.000


### Example 2: Looking for case where E1 gives tighter bounds ###

Observational data:
  P(X=1) = 0.7
  P(Y=1|X=1) = 0.3
  P(Y=1|X=0) = 0.8
  P(Y=1) = 0.450

Feasible experimental values:
  E0 can be in [0.000, 1.000]
  E1 can be in [0.450, 0.450]

Chosen experimental values:
  E0 = 0.500
  E1 = 0.450

1. Purely observational bounds:
   ATE ∈ [-0.550, 0.450]
   Width = 1.000

2. With E0 = 0.500:
   ATE ∈ [-0.050, -0.050]
