In [1]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.linear_model import LinearRegression
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%reload_ext autoreload
%autoreload 2
import itertools
import sys
sys.path.append('/home/roquero/CausalAggregation/Code')
from generateEnvironment import GenerateEnvironment
from robust_estimator import SolveProblem
import matplotlib as mpl
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
np.set_printoptions(suppress=True)
np.set_printoptions(precision=5)

examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
  "found relative to the 'datapath' directory.".format(key))


In [2]:
connectivity_e0 = np.array([
    [0,0,0,0,0,0,0,0], # H / 0
    [0,0,0,0,0,0,0,0], # I / 1
    [2,0,0,0,-1,0,0,0], # X_1 / 2
    [1,0,1,0,0,0,0,0], # X_2 / 3
    [0,0,0,2,0,0,0,0], # X_3 / 4
    [0,0,1,0,1,0,0,0], # X_4 / 5
    [1,0,0,1,0,2,0,0], # Y / 6
    [0,0,0,1,0,2,-1,0], # X_5 / 7
])
x_indices=np.array([2,3,4,5,7])
y_index=6
n_samples=50000

In [18]:
interventional_indices = np.array([2,3,4,5,7])#[7]
solver = SolveProblem(connectivity_e0, x_indices, y_index)

list_dict_interventions = [{i:{'type':'independent'} for i in np.array(interventional_indices)}]

list_environments = [solver.generate_intervention(n_samples, dict_interventions)
                     for dict_interventions in list_dict_interventions]

In [19]:
M, _ = solver.combine_constraints(list_environments)
delta = solver.compute_wass_radius(solver.generate_intervention(n_samples, {}), M, interventional_indices)

In [20]:
np.sqrt(delta)

7.090337113779424

In [21]:
problem = solver.robust_estimator(np.sqrt(delta), list_environments, solver.generate_intervention(n_samples, {}))
beta_hat = problem.variables()[0].value

## Min max upper bound

In [22]:
problem.value**2

1.7960962396983828

In [23]:
solver = SolveProblem(connectivity_e0,x_indices,y_index)
interventional_environment = solver.generate_intervention(n_samples,
                        {i:{'type':'do-zero'} for i in np.array(interventional_indices)})['dataset']

solver = SolveProblem(connectivity_e0,x_indices,y_index)
observational_environment = solver.generate_intervention(n_samples,{})['dataset']

lr = LinearRegression()
_  = lr.fit(observational_environment[x_indices,:].T, observational_environment[y_index,:])
beta_LR_obs = lr.coef_

lr = LinearRegression()
_  = lr.fit(interventional_environment[x_indices,:].T, interventional_environment[y_index,:])
beta_LR_int = lr.coef_

In [24]:
solver.beta
beta_hat
beta_LR_obs
beta_LR_int

array([0, 1, 0, 2, 0])

array([ 0.00204,  1.0681 ,  0.02843,  2.12711, -0.03346])

array([ 0.08495,  1.07196,  0.15775,  1.99837, -0.53989])

array([0., 0., 0., 0., 0.])

## Performance over the interventional do-zero dataset.

Robust estimator

In [25]:
np.sum(np.square(interventional_environment[x_indices,:].T.dot(beta_hat)
               -interventional_environment[y_index,:]))/interventional_environment.shape[1]

2.0021145324761136

Beta^0 estimator

In [26]:
np.sum(np.square(interventional_environment[x_indices,:].T.dot(solver.beta)
               -interventional_environment[y_index,:]))/interventional_environment.shape[1]

2.0021145324761136

beta_OLS on the interventional dataset

In [27]:
np.sum(np.square(interventional_environment[x_indices,:].T.dot(beta_LR_int)
               -interventional_environment[y_index,:]))/interventional_environment.shape[1]

2.0021145324761136

beta_OLS on observational dataset

In [28]:
np.sum(np.square(interventional_environment[x_indices,:].T.dot(beta_LR_obs)
               -interventional_environment[y_index,:]))/interventional_environment.shape[1]

2.0021145324761136

## Performance over observational dataset.

Robust estimator

In [29]:
np.sum(np.square(observational_environment[x_indices,:].T.dot(beta_hat)
               -observational_environment[y_index,:]))/observational_environment.shape[1]

1.330426305811926

Beta^0 estimator

In [30]:
np.sum(np.square(observational_environment[x_indices,:].T.dot(solver.beta)
               -observational_environment[y_index,:]))/observational_environment.shape[1]

1.9984519871827002

beta_OLS on observational dataset

In [31]:
np.sum(np.square(observational_environment[x_indices,:].T.dot(beta_LR_obs)
               -observational_environment[y_index,:]))/observational_environment.shape[1]

0.5380317227987106

beta_OLS on the interventional dataset

In [32]:
np.sum(np.square(observational_environment[x_indices,:].T.dot(beta_LR_int)
               -observational_environment[y_index,:]))/observational_environment.shape[1]

46.41119555136023

## Additional sanity checks

In [458]:
x_dim=4
V = np.random.binomial(n=5,p=0.2, size=(x_dim,2))

In [481]:
VtV = V.T.dot(V)
M = np.eye(2)*12
VtVinv = np.linalg.inv(VtV + M)
A = V.dot(VtVinv).dot(V.T)

In [482]:
Q = np.eye(x_dim)-A
Q
np.linalg.eigvals(Q)
np.linalg.inv(Q)

array([[ 0.91304, -0.04348, -0.04348, -0.17391],
       [-0.04348,  0.9398 ,  0.01672, -0.08696],
       [-0.04348,  0.01672,  0.9398 , -0.08696],
       [-0.17391, -0.08696, -0.08696,  0.65217]])

array([1.     , 0.52174, 0.92308, 1.     ])

array([[ 1.16667,  0.08333,  0.08333,  0.33333],
       [ 0.08333,  1.08333, -0.     ,  0.16667],
       [ 0.08333,  0.     ,  1.08333,  0.16667],
       [ 0.33333,  0.16667,  0.16667,  1.66667]])

In [483]:
Qinv = sum([np.linalg.multi_dot([A]*i) for i in np.arange(2,100)])+A+np.eye(x_dim)

In [484]:
np.max(np.abs(Qinv-np.linalg.inv(Q)))

2.220446049250313e-16