In [13]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.linear_model import LinearRegression, LassoCV
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%reload_ext autoreload
%autoreload 2
import itertools
import sys
sys.path.append('/home/roquero/CausalAggregation/Code')
from generateEnvironment import GenerateEnvironment, generate_constraints
from solveProblem import SolveProblem
import matplotlib as mpl
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
np.set_printoptions(suppress=True)
np.set_printoptions(precision=5)

In [14]:
n_dim=200
connectivity_e0 = np.zeros((n_dim,n_dim))
connectivity_e0[np.arange(1,n_dim), np.arange(0,n_dim-1)] = 1
connectivity_e0[1:,0]=0
y_index=30
x_indices=np.hstack([np.arange(1,y_index),(np.arange(y_index+1,n_dim))])
n_reps=100
alpha=0.05

In [15]:
for n_samples in np.array([50,100,200,500,1000,5000,10000]):
    coverage=[]
    length=[]
    for _ in np.arange(n_reps):
        
        solver_obs = SolveProblem(connectivity_e0,x_indices,y_index)
        obs_dataset = solver_obs.generate_intervention(n_samples, {})['dataset']
    
        select_markov = LassoCV(max_iter=5000)
        _ = select_markov.fit(obs_dataset[x_indices,:].T,obs_dataset[y_index,:])
        selected_indices = x_indices[np.where(np.abs(select_markov.coef_)>1e-3)[0]]
        
        solver = SolveProblem(connectivity_e0,selected_indices,y_index)
        
        list_dict_interventions = [{i:{'type':'independent'} for i in selected_indices}]       
        list_environments = [solver.generate_intervention(n_samples, dict_interventions)
                             for dict_interventions in list_dict_interventions]
        
        _, CI = solver.compute_asymptotic_cov(list_environments, alpha)

        for coord in np.arange(len(selected_indices)):
            coverage.append((CI[0,coord]<solver.beta[coord])&
                        (CI[1,coord]>solver.beta[coord]))
            length.append(CI[1,coord]-CI[0,coord])
            
    print('For n_samples {}: Avg coverage: {} pm{}. Avg length: {} pm{}.'.format(n_samples,np.mean(coverage),2*np.std(coverage)/np.sqrt(n_reps),np.mean(length),2*np.std(length)/np.sqrt(n_reps))
         )

For n_samples 50: Avg coverage: 0.9465478841870824 pm0.04498665896820524. Avg length: 0.8499530319271315 pm0.1281239755755479.
For n_samples 100: Avg coverage: 0.9536199095022625 pm0.042061373112707225. Avg length: 0.4407902312798183 pm0.014327271583099715.
For n_samples 200: Avg coverage: 0.9482535575679172 pm0.04430293359483362. Avg length: 0.28724242740571504 pm0.00486616932683788.
For n_samples 500: Avg coverage: 0.9462647444298821 pm0.045098903702388185. Avg length: 0.1779294900702023 pm0.0016741534409614703.
For n_samples 1000: Avg coverage: 0.9466292134831461 pm0.04495435269845462. Avg length: 0.12550553915574805 pm0.0008563339882374423.
For n_samples 5000: Avg coverage: 0.9429097605893186 pm0.046402992996018036. Avg length: 0.05550808437213356 pm0.00016762684688797943.
For n_samples 10000: Avg coverage: 0.9791183294663574 pm0.028597640720428037. Avg length: 0.039221895709213986 pm7.863891542752217e-05.


In [16]:
solver.beta

array([0., 1., 0., 0.])