# Continous Frugal Flows

In this notebook we demonstrate the ability for Frugal Flows to identify Marginal Causal Effects

In [1]:
import sys
import os
sys.path.append("../") # go to parent dir

import jax
import jax.random as jr
import jax.numpy as jnp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import numpy as np
from scipy.stats import rankdata
import scipy.stats as ss
import statsmodels.api as sm
import seaborn as sns
from sklearn.model_selection import KFold

# from data.create_sim_data import *
import data.template_causl_simulations as causl_py
from data.run_all_simulations import plot_simulation_results
from frugal_flows.causal_flows import independent_continuous_marginal_flow, get_independent_quantiles, train_frugal_flow
from frugal_flows.bijections import UnivariateNormalCDF

import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
from rpy2.robjects.vectors import StrVector

# Activate automatic conversion of rpy2 objects to pandas objects
pandas2ri.activate()
base = importr('base')
utils = importr('utils')

# Import the R library causl
try:
    causl = importr('causl')
except Exception as e:
    package_names = ('causl')
    utils.install_packages(StrVector(package_names))

jax.config.update("jax_enable_x64", True)

hyperparams_dict = {
    'learning_rate': 5e-3,
    'RQS_knots': 8,
    'flow_layers': 5,
    'nn_width': 50,
    'nn_depth': 4,    
    'max_patience': 100,
    'max_epochs': 10000
}
causal_hyperparams = {
    'RQS_knots': 8,
    'flow_layers': 5,
    'nn_width': 50,
    'nn_depth': 4,   
}

SEED = 0
NUM_ITER = 25
NUM_SAMPLES = 25000
TRUE_PARAMS = {'ate': 1, 'const': 0, 'scale': 1}
CAUSAL_PARAMS = [2, 5]

In [2]:
def run_outcome_regression(data):
    Z_cont = data.get('Z_cont', None)
    Z_disc = data.get('Z_disc', None)
    if Z_cont == None:
        Z_full = Z_disc
    elif Z_disc == None:
        Z_full = Z_cont
    else:
        Z_full = jnp.hstack([Z_cont, Z_disc])
    Z_cols = [f"Z{i+1}" for i in range(Z_full.shape[1])]
    df = pd.DataFrame(jnp.hstack([data['Y'], data['X'], Z_full]), columns=['Y', 'X', *Z_cols])
    X_vars = df[['X', *Z_cols]]
    X_vars = sm.add_constant(X_vars)
    Y_var = df['Y']
    model = sm.OLS(Y_var, X_vars).fit()
    coefficient_X = model.params['X']
    coefficient_const = model.params['const']
    return coefficient_X, coefficient_const

In [3]:
data = causl_py.generate_mixed_samples(10000, CAUSAL_PARAMS, 3)
run_outcome_regression(data)

(5.317079641118621, 1.010870687858447)

## Checking for the Causal Effect

### Gaussians

#### Frugal Flow

In [4]:
# gaussian_covariates_results = causl_py.run_simulations(
#     causl_py.generate_gaussian_samples, 
#     seed=SEED, 
#     num_samples=NUM_SAMPLES, 
#     num_iter=NUM_ITER, 
#     causal_params=CAUSAL_PARAMS,
#     hyperparams_dict=hyperparams_dict,
#     causal_model_args={'ate': 0., 'const': 1., 'scale': 1}
# )
# gaussian_covariates_results

In [5]:
# display(gaussian_covariates_results.mean())
# display(gaussian_covariates_results.std())

In [6]:
# plt.figure(figsize=(12, 6))

# # Boxplot
# box = gaussian_covariates_results.boxplot(column=["ate", "const", "scale"], grid=False)

# # Adding lines for the true parameters
# plt.axhline(y=TRUE_PARAMS['ate'], color='r', linestyle='--', label='True ate')
# plt.axhline(y=TRUE_PARAMS['const'], color='g', linestyle='--', label='True const')
# plt.axhline(y=TRUE_PARAMS['scale'], color='b', linestyle='--', label='True scale')

# # Adding title and labels
# plt.title('Box and Whisker Plot for ATE, Const, and Scale')
# plt.ylabel('Values')
# plt.ylim([0.80, 1.20])
# plt.legend()

#### Outcome Regression

In [7]:
# gaussian_coeffs = {'ate': [], 'const': []}
# for i in range(NUM_ITER):
#     data = causl_py.generate_gaussian_samples(N=NUM_SAMPLES, causal_params=CAUSAL_PARAMS, seed=i)
#     coeff_X, coeff_const = run_outcome_regression(data)
#     gaussian_coeffs['ate'].append(coeff_X)
#     gaussian_coeffs['const'].append(coeff_const)
# gaussian_outcome_coeffs = pd.DataFrame.from_dict(gaussian_coeffs)
# gaussian_outcome_coeffs

In [8]:
# print(gaussian_outcome_coeffs.mean())
# print(gaussian_outcome_coeffs.std())

### Mixed Gaussian and Gamma Outcomes

In [9]:
Z_disc, Z_cont, X, Y = causl_py.generate_mixed_samples(20000, CAUSAL_PARAMS, 0).values()

In [10]:
uz_samples = causl_py.generate_uz_samples(Z_cont=Z_cont, use_marginal_flow=False, seed=0, frugal_flow_hyperparams=hyperparams_dict)

In [11]:
uz_disc_samples = uz_samples['uz_disc']
uz_cont_samples = uz_samples['uz_cont']

In [12]:
frugal_flow, losses = causl_py.train_frugal_flow(
    key=jr.PRNGKey(0),
    y=Y,
    u_z=uz_cont_samples,
    condition=X,
    **hyperparams_dict,
    causal_model='gaussian',
    causal_model_args={'ate': jnp.array([-7.]), 'const': 3., 'scale': 5}
)

  2%|█                                                      | 186/10000 [03:07<2:45:01,  1.01s/it, train=1.083259270849787, val=1.1823966923574782 (Max patience reached)]


In [13]:
causal_margin = frugal_flow.bijection.bijections[-1].bijection.bijections[0]

In [14]:
causal_margin.ate

Array([4.98305914], dtype=float64)

In [15]:
causal_margin.const

Array(2.01680274, dtype=float64)

In [16]:
causal_margin.scale

Array(1.01194294, dtype=float64)

In [17]:
continous_covariates_results = causl_py.run_simulations(
    causl_py.generate_mixed_samples, 
    seed=SEED, 
    num_samples=NUM_SAMPLES, 
    num_iter=NUM_ITER, 
    causal_params=CAUSAL_PARAMS,
    hyperparams_dict=hyperparams_dict,
    causal_model_args={'ate': 0., 'const': 0., 'scale': 1}
)

AttributeError: 'float' object has no attribute 'shape'

In [None]:
continous_covariates_results

In [None]:
print(continous_covariates_results.mean())
print(continous_covariates_results.std())

#### Outcome Regression

In [None]:
outcome_coeffs = {'ate': [], 'const': []}
for i in range(NUM_ITER):
    data = causl_py.generate_mixed_samples(N=NUM_SAMPLES, causal_params=CAUSAL_PARAMS, seed=i)
    coeff_X, coeff_const = run_outcome_regression(data)
    outcome_coeffs['ate'].append(coeff_X)
    outcome_coeffs['const'].append(coeff_const)
outcome_coeffs = pd.DataFrame.from_dict(outcome_coeffs)
outcome_coeffs

In [None]:
print(outcome_coeffs.mean())
print(outcome_coeffs.std())

In [None]:
plt.figure(figsize=(12, 6))

# Boxplot
box = continous_covariates_results.boxplot(column=["ate", "const", "scale"], grid=False)

# Adding lines for the true parameters
plt.axhline(y=TRUE_PARAMS['ate'], color='r', linestyle='--', label='True ate')
plt.axhline(y=TRUE_PARAMS['const'], color='g', linestyle='--', label='True const')
plt.axhline(y=TRUE_PARAMS['scale'], color='b', linestyle='--', label='True scale')

# Adding title and labels
plt.title('Box and Whisker Plot for ATE, Const, and Scale')
plt.ylabel('Values')
plt.ylim([0.80, 1.20])
plt.legend()

### Mixed Continuous and Discrete (Small)

In [None]:
discrete_small_covariates_results = causl_py.run_simulations(
    causl_py.generate_discrete_samples, 
    seed=SEED, 
    num_samples=NUM_SAMPLES, 
    num_iter=NUM_ITER, 
    causal_params=CAUSAL_PARAMS,
    hyperparams_dict=hyperparams_dict,
    causal_model_args={'ate': 0., 'const': 0., 'scale': 1}
)

In [None]:
discrete_small_covariates_results

In [None]:
print(discrete_small_covariates_results.mean())
print(discrete_small_covariates_results.std())

#### Outcome Regression

In [None]:
outcome_coeffs = {'ate': [], 'const': []}
for i in range(NUM_ITER):
    data = causl_py.generate_discrete_samples(N=NUM_SAMPLES, causal_params=CAUSAL_PARAMS, seed=i)
    coeff_X, coeff_const = run_outcome_regression(data)
    outcome_coeffs['ate'].append(coeff_X)
    outcome_coeffs['const'].append(coeff_const)
outcome_coeffs = pd.DataFrame.from_dict(outcome_coeffs)
outcome_coeffs

In [None]:
print(outcome_coeffs.mean())
print(outcome_coeffs.std())

In [None]:
plt.figure(figsize=(12, 6))

# Boxplot
box = discrete_small_covariates_results.boxplot(column=["ate", "const", "scale"], grid=False)

# Adding lines for the true parameters
plt.axhline(y=TRUE_PARAMS['ate'], color='r', linestyle='--', label='True ate')
plt.axhline(y=TRUE_PARAMS['const'], color='g', linestyle='--', label='True const')
plt.axhline(y=TRUE_PARAMS['scale'], color='b', linestyle='--', label='True scale')

# Adding title and labels
plt.title('Box and Whisker Plot for ATE, Const, and Scale')
plt.ylabel('Values')
plt.ylim([0.80, 1.20])
plt.legend()

### Mixed Continuous and Discrete (Large)

In [None]:
hyperparams_dict_large = hyperparams_dict.copy()
# hyperparams_dict_large['learning_rate'] = 1e-3
discrete_big_covariates_results = causl_py.run_simulations(
    causl_py.generate_many_discrete_samples, 
    seed=SEED, 
    num_samples=NUM_SAMPLES, 
    num_iter=NUM_ITER, 
    causal_params=CAUSAL_PARAMS,
    hyperparams_dict=hyperparams_dict_large,
    causal_model_args={'ate': 0., 'const': 0., 'scale': 1}
)

In [None]:
discrete_big_covariates_results

In [None]:
print(discrete_big_covariates_results.mean())
print(discrete_big_covariates_results.std())

#### Outcome Regression

In [None]:
outcome_coeffs = {'ate': [], 'const': []}
for i in range(NUM_ITER):
    data = causl_py.generate_many_discrete_samples(N=NUM_SAMPLES, causal_params=CAUSAL_PARAMS, seed=i)
    coeff_X, coeff_const = run_outcome_regression(data)
    outcome_coeffs['ate'].append(coeff_X)
    outcome_coeffs['const'].append(coeff_const)
outcome_coeffs = pd.DataFrame.from_dict(outcome_coeffs)
outcome_coeffs

In [None]:
print(outcome_coeffs.mean())
print(outcome_coeffs.std())

In [None]:
plt.figure(figsize=(12, 6))

# Boxplot
box = discrete_big_covariates_results.boxplot(column=["ate", "const", "scale"], grid=False)

# Adding lines for the true parameters
plt.axhline(y=TRUE_PARAMS['ate'], color='r', linestyle='--', label='True ate')
plt.axhline(y=TRUE_PARAMS['const'], color='g', linestyle='--', label='True const')
plt.axhline(y=TRUE_PARAMS['scale'], color='b', linestyle='--', label='True scale')

# Adding title and labels
plt.title('Box and Whisker Plot for ATE, Const, and Scale')
plt.ylabel('Values')
plt.ylim([0.80, 1.20])
plt.legend()

### Mixed Continuous and Discrete (Large and Sparse)

In [None]:
# discrete_sparse_covariates_results = causl_py.run_simulations(
#     causl_py.generate_many_discrete_samples_sparse, 
#     seed=SEED, 
#     num_samples=NUM_SAMPLES, 
#     num_iter=NUM_ITER, 
#     causal_params=CAUSAL_PARAMS,
#     hyperparams_dict=hyperparams_dict,
#     causal_model_args={'ate': 0., 'const': 0., 'scale': 1}
# )
# discrete_sparse_covariates_results

In [None]:
# print(discrete_sparse_covariates_results.mean())
# print(discrete_sparse_covariates_results.std())

#### Outcome Regression

In [None]:
outcome_coeffs = {'ate': [], 'const': []}
for i in range(NUM_ITER):
    data = causl_py.generate_many_discrete_samples(N=NUM_SAMPLES, causal_params=CAUSAL_PARAMS, seed=i)
    coeff_X, coeff_const = run_outcome_regression(data)
    outcome_coeffs['ate'].append(coeff_X)
    outcome_coeffs['const'].append(coeff_const)
outcome_coeffs = pd.DataFrame.from_dict(outcome_coeffs)
outcome_coeffs

In [None]:
# plt.figure(figsize=(12, 6))

# # Boxplot
# box = discrete_sparse_covariates_results.boxplot(column=["ate", "const", "scale"], grid=False)

# # Adding lines for the true parameters
# plt.axhline(y=TRUE_PARAMS['ate'], color='r', linestyle='--', label='True ate')
# plt.axhline(y=TRUE_PARAMS['const'], color='g', linestyle='--', label='True const')
# plt.axhline(y=TRUE_PARAMS['scale'], color='b', linestyle='--', label='True scale')

# # Adding title and labels
# plt.title('Box and Whisker Plot for ATE, Const, and Scale')
# plt.ylabel('Values')
# plt.ylim([0.80, 1.20])
# plt.legend()