In [18]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.linear_model import LinearRegression
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%reload_ext autoreload
%autoreload 2
import itertools
import sys
sys.path.append('/home/roquero/CausalAggregation/Code')
from generateEnvironment import GenerateEnvironment
from solveProblem import SolveProblem
import matplotlib as mpl
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
np.set_printoptions(suppress=True)
np.set_printoptions(precision=4)

In [19]:
n_dim=9
connectivity_e0 = np.array([
    [0]*9,
    [2,0,0,0,2,1,0,0,0],
    [0,0,0,0,0,-1,0,0,0],
    [0,0,0,0,0,2,-1,0,0],
    [0,0,0,0,0,0,0,1,0],
    [0]*9,
    [0,0,0,0,0,0,0,0,-1],
    [0]*9,
    [3,2,1,-1,0,0,0,0,0],
]
)
y_index=8
x_indices=np.arange(1,y_index)
print('x_dim: {}'.format(len(x_indices)))
n_samples=100000

x_dim: 7


In [22]:
for n_samples in np.array([100,500,1000,10000,100000,500000,1000000]):
    print('\nSimulation for sample size = {}'.format(n_samples))
    list_dict_interventions = [{i:{'type':'independent'} for i in np.array([5,6])},
                               {i:{'type':'independent'} for i in np.array([1,4,3])}]
    solver = SolveProblem(connectivity_e0,x_indices,y_index)
    list_environments = [solver.generate_intervention(n_samples, dict_interventions)
                         for dict_interventions in list_dict_interventions]
    
    print('True beta: \n{}'.format(solver.beta))
    print('Least squares estimate of beta: \n{}'.format(solver.estimator_LR(solver.generate_intervention(n_samples, {}))))
    print('Lasso CV estimate of beta: \n{}'.format(solver.estimator_Lasso(solver.generate_intervention(n_samples, list_dict_interventions[0]))))
    
    lmbda = 0.2
    problem = solver.solve_problem(lmbda, list_environments, solver.generate_intervention(n_samples,{}))
    print('Estimator with lmbda set to {}: \n{}'.format(lmbda, (problem.variables()[0].value)))
    
    problem_2 = solver.solve_problem_CV(list_environments, solver.generate_intervention(n_samples,{}))
    print('Estimator with lmbda chosen by CV (selected value is {:.4f}): \n{}'.format(solver.selected_lmbda,
                                                                              (problem_2.variables()[0].value)))


Simulation for sample size = 100
True beta: 
[ 2  1 -1  0  0  0  0]
Least squares estimate of beta: 
[ 0.7734  0.1931 -0.1379 -0.5655 -0.485  -0.6431  0.1233]
Lasso CV estimate of beta: 
[ 3.2425  1.2006 -1.0434 -2.3719 -0.6234 -0.      0.    ]
Estimator with lmbda set to 0.2: 
[ 0.9534  0.37   -0.2814  0.272  -0.8886 -0.6159 -0.0225]
Estimator with lmbda chosen by CV (selected value is 0.2512): 
[ 0.9288  0.0989 -0.2067  0.     -1.2745 -0.5913 -0.    ]

Simulation for sample size = 500
True beta: 
[ 2  1 -1  0  0  0  0]
Least squares estimate of beta: 
[ 0.7051  0.2325 -0.2305 -0.5353 -0.226  -0.797  -0.0394]
Lasso CV estimate of beta: 
[ 3.1581  1.0195 -1.0001 -2.2907 -1.1753  0.     -0.    ]
Estimator with lmbda set to 0.2: 
[ 1.0825  0.1746 -0.8825  0.     -0.     -0.3629  0.0135]
Estimator with lmbda chosen by CV (selected value is 0.0010): 
[ 1.8669  0.1448 -0.9722 -0.0818 -0.8135 -0.1055 -0.2525]

Simulation for sample size = 1000
True beta: 
[ 2  1 -1  0  0  0  0]
Least square

SolverError: Solver 'MOSEK' failed. Try another solver, or solve with verbose=True for more information.

In [16]:
for n_samples in np.array([100,500,1000,10000,100000,500000,1000000]):
    print('\nSimulation for sample size = {}'.format(n_samples))
    list_dict_interventions = [{i:{'type':'independent'} for i in np.array([1,4])}]
    solver = SolveProblem(connectivity_e0,x_indices,y_index)
    list_environments = [solver.generate_intervention(n_samples, dict_interventions)
                         for dict_interventions in list_dict_interventions]
    
    print('True beta: \n{}'.format(solver.beta))
    print('Least squares estimate of beta: \n{}'.format(solver.estimator_LR(solver.generate_intervention(n_samples, {}))))
    print('Lasso CV estimate of beta: \n{}'.format(solver.estimator_Lasso(solver.generate_intervention(n_samples, list_dict_interventions[0]))))
    
    lmbda = 0.2
    problem = solver.solve_problem(lmbda, list_environments, solver.generate_intervention(n_samples,{}))
    print('Estimator with lmbda set to {}: \n{}'.format(lmbda, (problem.variables()[0].value)))
    
    problem_2 = solver.solve_problem_CV(list_environments, solver.generate_intervention(n_samples,{}))
    print('Estimator with lmbda chosen by CV (selected value is {:.4f}): \n{}'.format(solver.selected_lmbda,
                                                                              (problem_2.variables()[0].value)))


Simulation for sample size = 100
True beta: 
[ 2  1 -1  0  0  0  0]
Least squares estimate of beta: 
[ 1.0183  0.2835 -0.2653 -0.7214 -0.3314 -0.6575 -0.204 ]
Lasso CV estimate of beta: 
[ 0.0461 -0.1798  0.0961  0.1789 -0.6639 -0.6911 -0.0442]
Estimator with lmbda set to 0.2: 
[ 0.      0.      0.     -0.     -0.     -0.9015 -0.    ]
Estimator with lmbda chosen by CV (selected value is 0.1585): 
[ 0.0376 -0.      0.      0.0273 -0.     -0.9213  0.    ]

Simulation for sample size = 500
True beta: 
[ 2  1 -1  0  0  0  0]
Least squares estimate of beta: 
[ 0.8468  0.2741 -0.2165 -0.6416 -0.3828 -0.6843 -0.0121]
Lasso CV estimate of beta: 
[ 0.1141  0.1091 -0.0995 -0.      0.     -0.9209 -0.0268]
Estimator with lmbda set to 0.2: 
[ 0.      0.      0.087  -0.     -0.     -0.7888 -0.    ]
Estimator with lmbda chosen by CV (selected value is 0.0631): 
[ 0.0982 -0.     -0.016   0.0153 -0.     -0.8903  0.    ]

Simulation for sample size = 1000
True beta: 
[ 2  1 -1  0  0  0  0]
Least square