In [19]:
# user defined R installation
import os
os.environ['R_HOME'] = 'D:/Program Files/R-4.5.0' #path to your R installation
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects import globalenv
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector

from causaloptim_python import run_experiment, extract_prob_dict



# Load R packages
#igraph = importr('igraph')
causaloptim = importr('causaloptim')
base = importr('base')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Data Generation

True ATE: 0.19831845457157393


#### causaloptim

In [59]:
# Set seed and generate synthetic data in Python
np.random.seed(299128)
n = 5000
Z = np.random.binomial(1, 0.5, size=n)
U = np.random.binomial(1, 0.5, size=n)

# X ~ Z + U
logit_X = -1 + 1.5 * Z + 1.2 * U
p_X = 1 / (1 + np.exp(-logit_X))
X = np.random.binomial(1, p_X)

# Y ~ X + U
beta_X, beta_U = 1.0, 1.5
logit_Y = -0.5 + beta_X * X + beta_U * U
p_Y = 1 / (1 + np.exp(-logit_Y))
Y = np.random.binomial(1, p_Y)

df = pd.DataFrame({'Y': Y, 'X': X, 'Z': Z})

# Ground truth ATE
logit_Y1 = -0.5 + beta_X * 1 + beta_U * U
logit_Y0 = -0.5 + beta_X * 0 + beta_U * U
p_Y1 = 1 / (1 + np.exp(-logit_Y1))
p_Y0 = 1 / (1 + np.exp(-logit_Y0))
ATE_true = np.mean(p_Y1 - p_Y0)
print("True ATE:", ATE_true)


graph_str = "(Z -+ X, X -+ Y, Ur -+ X, Ur -+ Y)"
leftside = [1, 0, 0, 0]
latent   = [0, 0, 0, 1]
nvals    = [2, 2, 2, 2]
rlconnect = [0, 0, 0, 0]
monotone = [0, 0, 0, 0]

prob_dict = extract_prob_dict(df)


bounds = run_experiment(graph_str, leftside, latent, nvals, rlconnect, monotone, prob_dict)
print("Bounds:", bounds)

True ATE: 0.19725243674534862
Bounds: (<rpy2.robjects.vectors.FloatVector object at 0x000001762E758510> [14]
R classes: ('numeric',)
[-0.117957], <rpy2.robjects.vectors.FloatVector object at 0x000001762E727B10> [14]
R classes: ('numeric',)
[0.545062])


#### Varying Synthetic Data

In [57]:
results = []

n = 578740
graph_str = "(Z -+ X, X -+ Y, Ur -+ X, Ur -+ Y)"
leftside = [1, 0, 0, 0]
latent   = [0, 0, 0, 1]
nvals    = [2, 2, 2, 2]
rlconnect = [0, 0, 0, 0]
monotone = [0, 0, 0, 0]

for i in range(10):
    seed = np.random.randint(0, 1e6)
    np.random.seed(seed)

    # Random coefficients
    b_Z = np.round(np.random.uniform(0.5, 200.0), 3)
    b_U_X = np.round(np.random.uniform(0.5, 200.0), 3)
    b_X_Y = np.round(np.random.uniform(0.5, 200.0), 3)
    b_U_Y = np.round(np.random.uniform(0.5, 200.0), 3)

    # Simulate data
    Z = np.random.binomial(1, 0.5, size=n)
    U = np.random.binomial(1, 0.5, size=n)

    logit_X = -1 + b_Z * Z + b_U_X * U
    p_X = 1 / (1 + np.exp(-logit_X))
    X = np.random.binomial(1, p_X)

    logit_Y = -0.5 + b_X_Y * X + b_U_Y * U
    p_Y = 1 / (1 + np.exp(-logit_Y))
    Y = np.random.binomial(1, p_Y)

    # True ATE
    logit_Y1 = -0.5 + b_X_Y * 1 + b_U_Y * U
    logit_Y0 = -0.5 + b_X_Y * 0 + b_U_Y * U
    p_Y1 = 1 / (1 + np.exp(-logit_Y1))
    p_Y0 = 1 / (1 + np.exp(-logit_Y0))
    ATE_true = np.mean(p_Y1 - p_Y0)

    # Estimate bounds
    df = pd.DataFrame({'Y': Y, 'X': X, 'Z': Z})
    prob_dict = extract_prob_dict(df)
    r_bounds = run_experiment(
    graph_str, leftside, latent, nvals, rlconnect, monotone, prob_dict
    )
    bound_lower = float(r_bounds[0][0])
    bound_upper = float(r_bounds[1][0])

    # Save result
    results.append({
        'seed': seed,
        'b_Z': b_Z,
        'b_U_X': b_U_X,
        'b_X_Y': b_X_Y,
        'b_U_Y': b_U_Y,
        'ATE_true': ATE_true,
        'bound_lower': bound_lower,
        'bound_upper': bound_upper
    })

In [56]:
df_results = pd.DataFrame(results)
print(df_results.head())

     seed      b_Z    b_U_X    b_X_Y    b_U_Y  ATE_true  bound_lower  \
0  838977   63.010   34.292   98.238  130.756  0.319820     0.381031   
1  954840   57.411  194.924  195.118   99.650  0.315462     0.369048   
2  299128  143.444    1.400  183.427   93.857  0.310732     0.565445   
3  578740  132.778  156.233   55.240  134.273  0.307246     0.359903   
4  889396   17.937    7.737   31.790   39.149  0.312475     0.367747   

   bound_upper  
0     0.694021  
1     0.748413  
2     0.330648  
3     0.707729  
4     0.719088  


#### Iteration testing area

In [110]:
seed = np.random.randint(0, 1e6)
np.random.seed(seed)

# Random coefficients
b_Z = np.round(np.random.uniform(0.5, 2.0), 3)
b_U_X = np.round(np.random.uniform(0.5, 2.0), 3)
b_X_Y = np.round(np.random.uniform(0.5, 2.0), 3)
b_U_Y = np.round(np.random.uniform(0.5, 2.0), 3)

# Simulate data
Z = np.random.binomial(1, 0.5, size=n)
U = np.random.binomial(1, 0.5, size=n)

logit_X = -1 + b_Z * Z + b_U_X * U
p_X = 1 / (1 + np.exp(-logit_X))
X = np.random.binomial(1, p_X)

logit_Y = -0.5 + b_X_Y * X + b_U_Y * U
p_Y = 1 / (1 + np.exp(-logit_Y))
Y = np.random.binomial(1, p_Y)

# True ATE
logit_Y1 = -0.5 + b_X_Y * 1 + b_U_Y * U
logit_Y0 = -0.5 + b_X_Y * 0 + b_U_Y * U
p_Y1 = 1 / (1 + np.exp(-logit_Y1))
p_Y0 = 1 / (1 + np.exp(-logit_Y0))
ATE_true = np.mean(p_Y1 - p_Y0)

# Estimate bounds
df = pd.DataFrame({'Y': Y, 'X': X, 'Z': Z})
prob_dict = extract_prob_dict(df)
r_bounds = run_experiment(
graph_str, leftside, latent, nvals, rlconnect, monotone, prob_dict
)
bound_lower = float(r_bounds[0][0])
bound_upper = float(r_bounds[1][0])

print(f"Seed: {seed}")
print(f"Coefficients:")
print(f"  b_Z: {b_Z}")
print(f"  b_U_X: {b_U_X}")
print(f"  b_X_Y: {b_X_Y}")
print(f"  b_U_Y: {b_U_Y}")
print(f"True ATE: {ATE_true}")
print(f"Probabilities =1:")
print(f" p(Y=1): {p_Y}")
print(f" p(X=1): {p_X}")

print(f"Bounds:")
print(f"  Lower: {bound_lower}")
print(f"  Upper: {bound_upper}")

Seed: 289586
Coefficients:
  b_Z: 0.908
  b_U_X: 1.942
  b_X_Y: 1.967
  b_U_Y: 0.871
True ATE: 0.37794317326984606
Probabilities =1:
 p(Y=1): [0.91197567 0.91197567 0.37754067 ... 0.91197567 0.81260097 0.37754067]
 p(X=1): [0.8641271  0.8641271  0.47701621 ... 0.8641271  0.47701621 0.26894142]
Bounds:
  Lower: -0.1325433177613982
  Upper: 0.7056929857810956
