In [6]:
using CSV

In [7]:
struct VertexLabels
  u1::Vector{Int}  # Latent variables, parents of colliders Z1 and outcome Y
  u2::Vector{Int}  # Latent variables, parents of colliders Z1 and treatment X
  w::Vector{Int}       # Instrument
  x::Vector{Int}       # Treatment
  y::Vector{Int}       # Outcome
  z1::Vector{Int}  # Colliders between X and Y
  z3::Vector{Int}  # Confounders between X and Y
end

In [42]:
w = collect(1:1)
x = collect(2:2)
y = collect(3:3)
u1 = collect(4:8)
u2 = collect(9:13)
z1 = collect(14:18)
z3 = collect(19:25)
vertex_labels = VertexLabels(u1, u2, w, x, y, z1, z3)
n = 60000
num_vertices = 25
dat = zeros(n, num_vertices);
#read in dataset
i = 1
for row in CSV.Rows("nhs_data_60000.csv", datarow=2)
    dat[i,:] = [parse(Float64, x) for x in row[2:end]]
    i+=1
end

# dat = dat[1:20000,:];

In [199]:
# code inspired by makeDecisionAndEstimateEffect_3classes.m AND 
#				   calculateEstimatedEffect.m
# INPUT
# data
# D (n,d+3)... data matrix
#   (n,d)  ... Z covariates
#   (n,)   ... W auxiliary variable
#   (n,)   ... X treatment
#   (n,)   ... Y outcome
# datatype ... string: 'continuous' if data are Gaussain
#                      'discrete' if data are discrete
# p_vals_wy_zx ... p-values from tests
# p_vals_wy_z  ... p-values from tests
# Zsels  ... selected Zs
#
# OUTPUT
# est ... estiamte of the causal effect of x on y, or NaN
# Dec ... decision: 1 for D1, 2 for D2, 3 for D3, 4 if Decision from Naive
#         Bayes classifier was D1, but estimates were not similar enough, so
#         we do not give an estimate of the causal effect of x on y, i.e. est
#         is the following depending on Dec:
#         if Decision is D1 (i.e. Dec=1) est is the estimate of the non-zero
#         effect of x on y. For D2 (i.e. Dec=2), est = 0; For D3 (i.e.
#         Dec=3), or Dec=4, est = NaN;
# post_prob = class posterior probabilities of D1, D2, and D3 (in this order)
def estimate_effects(D, datatype, p_vals_wy_zx, p_vals_wy_z, Zsels):

	(n,d3) = D.shape
	ixY    = d3-1
	ixX    = d3-2
	ixW    = d3-3
	ixZ    = np.arange(d3-3)
	d      = d3-3
	t      = p_vals_wy_zx.shape[0]
	p_reject = 0.001
	p_accept = 0.1
	# if datatype == 'continuous':
	# 	# assuming n >= 5000
	# 	p_reject = 0.001
	# 	p_accept = 0.1
	# elif datatype == 'discrete':
	# 	itest = 'logOdds'
	# 	cov = D
	# else:
	# 	error('no such datatype')

	# NOTE: we do not use this code as it is only to check if R1 or R2/R3 
	#       applies (we assume R3 applies) 
	# getCountsFromPvalues.m
	# ----------------------
	# cntR3 = sum( temp{3}(:,2) > p_accept & temp{3}(:,3) < p_reject)
	# cntR3 = np.sum(p_vals_wy_zx > p_accept & p_vals_wy_z < p_reject)
	# cntR3_norm = cntR3/t

	# calculateEstimatedEffect.m
	# --------------------------
	#bool = temp(:,2) > p_accept & temp(:,3) < p_reject
	bool = np.logical_and((p_vals_wy_zx > p_accept),(p_vals_wy_z < p_reject))
	Zaccept = np.array(Zsels)[bool]
	times = Zaccept.shape[0]
	
	if datatype == 'continuous':
		b = np.ones((n,1))
		Dx = D[:,ixX].reshape((n,1))
		Dy = D[:,ixY].reshape((n,1))
		if times == 0:
			X = np.concatenate((b, Dx), axis=1)
			C = np.dot(X.transpose(), X)
			Xy= np.dot(X.transpose(), Dy)
			theta = np.linalg.solve(C, Xy)
			ate = theta[1]
		else:
			ate = np.zeros((times,))
			for i in range(times):
				Z = Zaccept[i]
				if Z.shape[0] == 0:
					X = np.concatenate((b, Dx), axis=1)
				else:
					X = np.concatenate((b, Dx, D[:,Z]), axis=1)
					# (X'X)^-1(X'y)
				C = np.dot(X.transpose(), X)
				Xy= np.dot(X.transpose(), Dy)
				theta = np.linalg.solve(C, Xy)
				# NOTE: we do not check if estimates are similar
				#		and output NaN if so, as in the original
				#       code via:
    			# val = areEstiamtesSimilar_Clusters(ests, CIs);
				ate[i] = theta[1]
	elif datatype == 'discrete':
		error('not implemented')
	else:
		error('no such datatype')

	return ate


# code inspired by algorithm_applyRules123_random.m
# NOTE from entner!
# NOTE: in the comments of the code, the rule numbers do not match the ones in
      # the paper. (We changed themw later on in the article, apologies for any 
      # inconveniences.) Here is the correspondence between the rules:

      # article               code
      # R1 (i) + (ii)         R3 (iii) + (ii) (Note R3(i) in code is not used)
      # R2 (i)                R1
      # R2 (ii) + (iii)       R2
# INPUT
# data
# D (n,d+3)... data matrix
#   (n,d)  ... Z covariates
#   (n,)   ... W auxiliary variable
#   (n,)   ... X treatment
#   (n,)   ... Y outcome
# datatype ... string: 'continuous' if data are Gaussain
#                      'discrete' if data are discrete
# k        ... maximal size of conditioning set in independence test
# t        ... how many tests are performed, i.e. how often do we
#                     select a random set Z (and w) and perform test
# 
# OUTPUT
# p_vals ... p-values from tests
# Zsels  ... selected Zs
def Zsel_highdim(D, datatype, k, t):

	(n,d3) = D.shape
	ixY    = d3-1
	ixX    = d3-2
	ixW    = d3-3
	ixZ    = np.arange(d3-3)
	d      = d3-3

	if datatype == 'continuous':
		itest = 'partialCorr'
		cov = np.cov(D.transpose())
	elif datatype == 'discrete':
		itest = 'logOdds'
		cov = D
	else:
        error('no such datatype')

	n_R3_cardZ = np.zeros((k+1,1)) # how many sets of each conditioning size up to K
	for i in range(k+1):
		n_R3_cardZ[i] = comb(d,i)

	# cumulative proportions of conditioing sets of size 0,...,K, for one w
	cumprop_R3 =  np.cumsum(n_R3_cardZ) / np.sum(n_R3_cardZ); 


	# save things
	Zsels = []
	p_vals_wy_zx = np.zeros((t,))
	p_vals_wy_z  = np.zeros((t,))

	rand_r = np.random.random(size=t)
	for i in range(t):
		#w = rand_d[i]
		r   = rand_r[i]
		cardZ = np.argmin(cumprop_R3 < r) # find the sampled bin
		#Ww = np.setdiff1d(W,w)
		temp = np.random.permutation(d)
		ix_Zsel = np.sort(temp[:cardZ])

		# condition 2 of R3: w indep y given Z and x
		pval_wy_ZX_R3 = indepTest(ixW, ixY, np.append(ix_Zsel,ixX), n, cov, itest);
        
		# condition 3 of R3: w not indep y given Z
		pval_wy_Z_R3 = indepTest(ixW, ixY, ix_Zsel, n, cov, itest);

		p_vals_wy_zx[i] = pval_wy_ZX_R3
		p_vals_wy_z[i]  = pval_wy_Z_R3
		Zsels.append(ix_Zsel)
	return p_vals_wy_zx, p_vals_wy_z, Zsels



def indepTest(ixA, ixB, ixZ, n, C, itest):
	if itest == 'partialCorr':
		ixAB = np.array([ixA,ixB])
		#        C(indAB,indAB) - C(indAB,ixZ) * C(ixZ,ixZ)^(-1) * C(ixZ,indAB)
		C_cond = C[ixAB,:][:,ixAB] - np.dot(C[ixAB,:][:,ixZ], np.linalg.solve(C[ixZ,:][:,ixZ], C[ixZ,:][:,ixAB]))
		# test if partial correlation is statistically significantly different
		# from 0 (using Fisher's Z, see Spirtes et al. p.94); if not, independence
		r = C_cond[0,1]
		fisherZ = 0.5* np.sqrt(n - ixZ.shape[0] - 3) * np.log(np.abs(1+r) / np.abs(1-r));
		p = 2*(1-norm.cdf(np.abs(fisherZ)));
	elif itest == 'logOdds':
		error('not implemented yet')
		#[pval_g, pval_chi] = gsquare_test(DataCov,states,w,x,Z);
		#p = pval_g;
	return p

# NOTE: DON'T USE THIS, JUST FOR TESTING
def depricated_sampling(n):
	z_dim = 30
	u_dim = 30
	a,b,c,d,e,l = np.random.normal(0,1,6)
	f,g,h,j,k = np.random.normal(0,1,size=(5,z_dim))

	D = np.zeros((n,z_dim*4+3))
	for i in range(n):
		W  = np.random.normal(0,1)
		U1 = np.random.normal(0,1,u_dim)
		U2 = np.random.normal(0,1,u_dim)
		Z1 = a*U1 + b*U2 + c*W + np.random.normal(0,.5,z_dim)
		Z2 = d*W  + np.random.normal(0,.5,z_dim)
		Z3 = np.random.normal(0,1,z_dim)
		Z4 = np.random.normal(0,1,z_dim)
		X = e*W + np.dot(f,U1) + np.dot(g,Z2) + np.dot(h,Z3) + np.random.normal(0,.5)
		Y = np.dot(j,Z3) + np.dot(k,Z4) + l*X + np.random.normal(0,.5)
		D[i,:z_dim] = Z1
		D[i,z_dim:z_dim*2] = Z2
		D[i,z_dim*2:z_dim*3] = Z3
		D[i,z_dim*3:z_dim*4] = Z4
		D[i,z_dim*4] = W
		D[i,z_dim*4+1] = X
		D[i,z_dim*4+2] = Y
	return D, l


def run(D):
    datatype = 'continuous' # they only have code for continuous data
    k = 30   # maximal size of conditioning set (Z3 is true set)
    t = 1000 # tests
    p_vals_wy_zx, p_vals_wy_z, Zsels = Zsel_highdim(D, datatype, k, t)
    ate = estimate_effects(D, datatype, p_vals_wy_zx, p_vals_wy_z, Zsels)
    print("ate:")
    print(ate)
    return ate, np.median(ate)

LoadError: syntax: extra token "estimate_effects" after end of expression

In [None]:
# train_D = []
# train_ATE_errs = []
# valid_ATE_errs = []
# # for i in range(1,21):
#     with open('Entner/eta_nomralize_hard_allZ_Oct2/outputfiles/train_data_'+str(i)+'.pickle', 'rb') as pickle_file:
#         train_D = pickle.load(pickle_file)
#     with open('Entner/eta_nomralize_hard_allZ_Oct2/outputfiles/valid_data_'+str(i)+'.pickle', 'rb') as pickle_file:
#         valid_D = pickle.load(pickle_file)
#     with open('Entner/eta_nomralize_hard_allZ_Oct2/outputfiles/train_real_ATE_'+str(i)+'.pickle', 'rb') as pickle_file:
#         l_train = pickle.load(pickle_file)
#     with open('Entner/eta_nomralize_hard_allZ_Oct2/outputfiles/valid_real_ATE_'+str(i)+'.pickle', 'rb') as pickle_file:
#         l_valid = pickle.load(pickle_file)

split = trunc(Int,(size(dat)[1]/2))
dat_train = dat[1:split,:]
dat_valid = dat[split+1:end,:]

ate_all_train, ate_median_train = run(dat_train)
ate_all_valid, ate_median_valid = run(dat_valid)
err_train = np.abs(ate_median_train - l_train)
err_valid = np.abs(ate_median_valid - l_valid)
# train_ATE_errs.append(err_train)
# valid_ATE_errs.append(err_valid)
err_train, err_valid

# pickle.dump( train_ATE_errs, open( "train_ATE_errs.p", "wb" ) )
# pickle.dump( valid_ATE_errs, open( "valid_ATE_errs.p", "wb" ) )
# print("mean train ATE err: " + str(np.mean(train_ATE_errs)))
# print("mean valid ATE err: " + str(np.mean(valid_ATE_errs)))
#     print('ate err = ' + str(err))

In [432]:
function choose_lambdas(lambda_twos, lambda1, seeds, dat, vertex_labels, max_iter, lrs, corr_boost, path_to_file)
  initial_lambda1 = lambda1
  corr_pxs = Dict() 
  hypo_fail_all_lambdas = Dict()
  num_Z = length(vertex_labels.z1)+length(vertex_labels.z3)
#   ATE_results_test = nothing


  w, x, y = vertex_labels.w, vertex_labels.x, vertex_labels.y
  Z = [vertex_labels.z1; vertex_labels.z3]
  best_Z = vertex_labels.z3

  split = trunc(Int,(size(dat)[1]/3))
  dat_train = dat[1:split,:]
  dat_valid = dat[split+1:split*2,:]
  dat_test = dat[(split*2)+1:end,:]
  # Sigma train construction
  Sigma_hat = cov(dat_train)
 for seed in seeds
    Random.seed!(seed);
  for lambda2 in lambda_twos
    println("lambda2: ", lambda2)
    for lr in lrs
      hypo_fail = Dict()
      println("lr: ", lr)
      reject_null_hypothesis = false
      lambda1 = initial_lambda1
      bonferonni_correction = 1.
      while (!reject_null_hypothesis)

        # learn theta on training set
        theta_hat, corr_px_theta_hat, corr_p_theta_hat = 
            bd_learn_linear(Sigma_hat, lambda1, lambda2, vertex_labels, 1, path_to_file,
                                    max_iter = max_iter, lr = lr, corr_boost = corr_boost)

        thresh = 1e-3
        sel_Z = findall(x->abs(x)>thresh, theta_hat)
        sel_Z = [x+Z[1]-1 for x in sel_Z]
        println(sel_Z)
        # test for the null hypothesis
        reject_null_hypothesis = 
          ind_null_hypo(n, num_Z, corr_p_theta_hat; significance_level=(0.01/bonferonni_correction))
        # if null rejected, i.e. reject_null_hypothesis=true
        if reject_null_hypothesis
          # compute ATEs training set
          Sigma_wyx_phi = build_phi_covariance(theta_hat, w, y, x, Z, Sigma_hat)
        try   
          atr_real_train = 0.
          ate_hat_train = (Sigma_wyx_phi[[3; 4], [3; 4]] \ Sigma_wyx_phi[[3; 4], 2])[1]
          ate_hat_all_Z_train = (Sigma_hat[[x; Z], [x; Z]] \ Sigma_hat[[x; Z], y])[1]
          ate_hat_best_Z_train = (Sigma_hat[[x; best_Z], [x; best_Z]] \ Sigma_hat[[x; best_Z], y])[1]
          ate_hat_sel_Z_train = (Sigma_hat[[x; sel_Z], [x; sel_Z]] \ Sigma_hat[[x; sel_Z], y])[1]
          ate_hat_marg_Z_train = Sigma_hat[x, y] / Sigma_hat[x, x]

          ATE_results_train = [atr_real_train ate_hat_best_Z_train ate_hat_train ate_hat_all_Z_train ate_hat_sel_Z_train ate_hat_marg_Z_train]
          # ATE_results_train = [ate_hat_best_Z_train ate_hat_train ate_hat_all_Z_train ate_hat_sel_Z_train ate_hat_marg_Z_train]

          # compute \rho(W,Y|beta*Z, X) validation
          Sigma_hat_valid = cov(dat_valid)
          Sigma_wyx_phi_valid = build_phi_covariance(theta_hat, w, 
            x, y, Z, Sigma_hat_valid)
   
            corr_px_valid = abs(partial_corr([1; 2], [3; 4], Sigma_wyx_phi_valid)[1, 2])
            # compute ATEs validation set
            atr_real_valid = 0.
            ate_hat_valid = (Sigma_wyx_phi_valid[[3; 4], [3; 4]] \ Sigma_wyx_phi_valid[[3; 4], 2])[1]
            ate_hat_all_Z_valid = (Sigma_hat_valid[[x; Z], [x; Z]] \ Sigma_hat_valid[[x; Z], y])[1]
            ate_hat_best_Z_valid = (Sigma_hat_valid[[x; best_Z], [x; best_Z]] \ Sigma_hat_valid[[x; best_Z], y])[1]
            ate_hat_sel_Z_valid = (Sigma_hat_valid[[x; sel_Z], [x; sel_Z]] \ Sigma_hat_valid[[x; sel_Z], y])[1]
            println(ate_hat_sel_Z_valid)
            ate_hat_marg_Z_valid = Sigma_hat_valid[x, y] / Sigma_hat_valid[x, x]

            ATE_results_valid = [atr_real_valid ate_hat_best_Z_valid ate_hat_valid ate_hat_all_Z_valid ate_hat_sel_Z_valid ate_hat_marg_Z_valid]
            # ATE_results_valid = [ate_hat_best_Z_valid ate_hat_valid ate_hat_all_Z_valid ate_hat_sel_Z_valid ate_hat_marg_Z_valid]
            # save corrs
            hypo_fail["seed"] = seed
            hypo_fail["corr_px_train"] = corr_px_theta_hat
            hypo_fail["corr_p_train"] = corr_p_theta_hat
            hypo_fail["corr_px_valid"] = corr_px_valid
            hypo_fail["lr"] = lr
            # parameters
            hypo_fail["theta_hat"] = theta_hat
            hypo_fail["sel_Z"] = sel_Z
            hypo_fail["lambda1"] = lambda1
            hypo_fail["lambda2"] = lambda2
            # ATEs train
            hypo_fail["ATE_results_train"] = ATE_results_train
            # ATEs valid
            hypo_fail["ATE_results_valid"] = ATE_results_valid
            hypo_fail_all_lambdas[hypo_fail["corr_px_valid"]] = hypo_fail
            break
          catch
            lambda1 = lambda1 * 2
            bonferonni_correction += 1.
          end
        else
          lambda1 = lambda1 * 2
          bonferonni_correction += 1.
        end
      end
    end
  end
 end
  best_setup = hypo_fail_all_lambdas[minimum(keys(hypo_fail_all_lambdas))]
    Sigma_hat_test = cov(dat_test)
    Sigma_wyx_phi_test = build_phi_covariance(best_setup["theta_hat"], w, 
    x, y, Z, Sigma_hat_test)
    # compute ATEs test set
    atr_real_test = 0.
    ate_hat_test = (Sigma_wyx_phi_test[[3; 4], [3; 4]] \ Sigma_wyx_phi_test[[3; 4], 2])[1]
    ate_hat_all_Z_test = (Sigma_hat_test[[x; Z], [x; Z]] \ Sigma_hat_test[[x; Z], y])[1]
    ate_hat_best_Z_test = (Sigma_hat_test[[x; best_Z], [x; best_Z]] \ Sigma_hat_test[[x; best_Z], y])[1]
    ate_hat_sel_Z_test = (Sigma_hat_test[[x; best_setup["sel_Z"]], [x; best_setup["sel_Z"]]] \ Sigma_hat_test[[x; best_setup["sel_Z"]], y])[1]

    println(ate_hat_sel_Z_test)
    ate_hat_marg_Z_test = Sigma_hat_test[x, y] / Sigma_hat_test[x, x]

    ATE_results_test = [atr_real_test ate_hat_best_Z_test ate_hat_test ate_hat_all_Z_test ate_hat_sel_Z_test ate_hat_marg_Z_test]
  return best_setup, ATE_results_test
end

choose_lambdas (generic function with 2 methods)

### NHS data, smaller var, Oct 8 morning

In [419]:
dat_small_var = zeros(90000, num_vertices);
m = 1
for row in CSV.Rows("nhs_data_smaller_var.csv", datarow=2)
    dat_small_var[m,:] = [parse(Float64, x) for x in row[2:end]]
    m+=1
end


# dat_small_var = dat_small_var[30001:end,:];

In [441]:
lambda1=5e-4
# lrs=[1e-5, 5e-4, 1e-4, 5e-3, 1e-3]
# lambda_twos=[5, 2, 1]
# lrs=[1e-4, 1e-5, 2e-5]
lambda_twos=[1, 1e-1, 1e-2]
lrs=[5e-5, 2e-5, 5e-4, 2e-4]
seeds = [20, 40, 60]
max_iter = 1000
corr_boost=1
best_setup, ATEs = choose_lambdas(lambda_twos, lambda1, seeds, dat_small_var, vertex_labels, max_iter, lrs, corr_boost, "Continuous_revival/real_world/outputfiles")

lambda2: 1.0
lr: 5.0e-5
[14, 15, 20, 21, 22, 23, 24]
[15, 16, 17, 18, 19, 20, 21, 22, 23]
-0.1023688604165896
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 0.0005
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lambda2: 0.1
lr: 5.0e-5
[14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25]
-0.2782191850887803
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 0.0005
[14, 17, 19, 20, 23, 24]
1.7297420094903093
lr: 0.0002
[14, 15, 16, 18, 19, 21, 22, 23, 24, 25]
2.174676862327235
lambda2: 0.01
lr: 5.0e-5
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 0.0005
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.27396984822314113
lambda2: 1.0
l

(Dict{Any,Any}("lr"=>5.0e-5,"ATE_results_valid"=>[0.0 0.0363165 … -0.102369 1.77332],"ATE_results_train"=>[0.0 0.0361244 … -0.118334 1.78158],"sel_Z"=>[15, 16, 17, 18, 19, 20, 21, 22, 23],"lambda2"=>1.0,"lambda1"=>0.001,"corr_px_valid"=>0.591291,"theta_hat"=>[-3.67275e-6, 0.545617, 0.0533428, -0.0195926, 0.289827, -0.0824615, -0.184364, -0.0281681, -0.0421756, 0.307724, -6.32423e-5, 5.01771e-5],"seed"=>20,"corr_p_train"=>0.115755…), [0.0 0.0364751 … -0.126647 1.78344])

In [442]:
best_setup["lr"],best_setup["lambda2"]

(5.0e-5, 1.0)

In [443]:
best_setup["ATE_results_valid"]

1×6 Array{Float64,2}:
 0.0  0.0363165  0.331118  -0.27397  -0.102369  1.77332

In [444]:
ATEs

1×6 Array{Float64,2}:
 0.0  0.0364751  0.333002  -0.287561  -0.126647  1.78344

In [445]:
ATEs_results = []
append!(ATEs_results, best_setup["ATE_results_valid"])
append!(ATEs_results, 0.0045665072676280725)
ATEs_results[1] = 0.03613994943765849

0.03613994943765849

In [390]:
round(abs(ATEs_results[1] - ATEs_results[5]),digits=3), round(abs(ATEs_results[1] - ATEs_results[4]), digits=3)

(0.75, 0.29)

In [392]:
round(abs(ATEs_results[1] - ATEs_results[6]), digits=3), round(abs(ATEs_results[1] -ATEs_results[7]),digits=3)

(1.734, 0.032)

In [355]:
include("simulate.jl")
include("learn_linear.jl")
include("util.jl")

ATE_learned

In [122]:
# lambda_twos=[2, 2e-2, 1e-2, 2e-3]
# lambda_twos=[5, 2, 1, 5e-1, 2e-1, 1e-1]
lambda1=5e-4
# lrs=[1e-5, 5e-4, 1e-4, 5e-3, 1e-3]
lambda_twos=[1, 5e-1, 1e-1]
lrs=[2e-5, 2e-4, 2e-3]
max_iter = 1000
corr_boost=1
best_setup = choose_lambdas(lambda_twos, lambda1, dat, vertex_labels, max_iter, lrs, corr_boost, "Continuous_revival/real_world/outputfiles")

lambda2: 1.0
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 22, 23, 25]
1.7400403937527105
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25]
-0.0022963116679162936
lr: 0.002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.008826311329718844
lambda2: 0.5
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25]
1.8421010368924406
lr: 0.0002
[14, 17, 18, 19, 20, 21, 23]
0.6958866133738458
lr: 0.002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.008826311329

Dict{Any,Any} with 10 entries:
  "lambda1"           => 0.001
  "ATE_results_train" => [0.0 0.0360363 … 0.00154426 1.77702]
  "lr"                => 0.0002
  "corr_px_valid"     => 0.053125
  "sel_Z"             => [14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25]
  "theta_hat"         => [-0.00865563, -0.00246184, 4.69253, -0.00088393, -0.00…
  "corr_p_train"      => 0.0171644
  "corr_px_train"     => 0.0585722
  "ATE_results_valid" => [0.0 0.0358647 … -0.00229631 1.77063]
  "lambda2"           => 1.0

In [10]:
# true=0.03617257959085344
# marg=-0.007099066432687132
# allZ=1.7737528049169522
# usingZ3=0.03596626017737463

# true: 0.03617257959085344
# marg: 1.77104284
# allZ: -0.009096044633038352
# usingZ3: 0.03565197301862628

LoadError: syntax: extra token "ATE" after end of expression

In [123]:
best_setup["ATE_results_valid"]

1×6 Array{Float64,2}:
 0.0  0.0358647  0.296284  -0.00882631  -0.00229631  1.77063

In [124]:
best_setup["sel_Z"]

11-element Array{Int64,1}:
 14
 15
 16
 18
 19
 20
 21
 22
 23
 24
 25

In [194]:
ATEs =  [0.03617257959085344  0.0358647  0.296284  -0.00882631  -0.00229631  1.77063]

1×6 Array{Float64,2}:
 0.0  0.0358647  0.296284  -0.00882631  -0.00229631  1.77063

In [197]:
abs(ATEs[1] - ATEs[5]), abs(ATEs[1] - ATEs[4]), abs(ATEs[1] - ATEs[6])

(0.00229631, 0.00882631, 1.77063)

In [201]:
Entner = (-0.0002190385869365213)
abs(ATEs[1] - Entner)

0.0002190385869365213

In [190]:
# lambda_twos=[2, 2e-2, 1e-2, 2e-3]
# lambda_twos=[5, 2, 1, 5e-1, 2e-1, 1e-1]
lambda1=5e-4
# lrs=[1e-5, 5e-4, 1e-4, 5e-3, 1e-3]
lambda_twos=[5, 1, 5e-1, 1e-1]
lrs=[2e-5, 5e-4, 2e-4, 2e-3, 2e-2]
max_iter = 1000
corr_boost=1
best_setup = choose_lambdas(lambda_twos, lambda1, dat, vertex_labels, max_iter, lrs, corr_boost, "Continuous_revival/real_world/outputfiles")

lambda2: 5.0
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25]
1.8421010368924406
lr: 0.0005
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-0.008826311329718844
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23

Dict{Any,Any} with 10 entries:
  "lambda1"           => 0.0005
  "ATE_results_train" => [0.0 0.0360363 … -0.00533642 1.77702]
  "lr"                => 0.0002
  "corr_px_valid"     => 0.234749
  "sel_Z"             => [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
  "theta_hat"         => [0.0134578, -0.00650186, -0.00957617, 6.40321, 0.01228…
  "corr_p_train"      => 0.525858
  "corr_px_train"     => 0.0179708
  "ATE_results_valid" => [0.0 0.0358647 … -0.00882631 1.77063]
  "lambda2"           => 0.5

In [193]:
0.03617257959085344+0.009222228559045302, 0.03617257959085344+0.00882631

(0.04539480814989874, 0.044998889590853436)

In [178]:
best_setup["ATE_results_valid"]

1×6 Array{Float64,2}:
 0.0  0.0358647  0.522219  -0.00882631  -0.00882631  1.77063

In [179]:
best_setup["sel_Z"]

12-element Array{Int64,1}:
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25

## Oct 8 NHS data

In [208]:
dat_oct8 = zeros(30000, num_vertices);
j = 1
for row in CSV.Rows("nhs_data_test_Oct8.csv", datarow=2)
    dat_oct8[j,:] = [parse(Float64, x) for x in row[2:end]]
    j+=1
end

In [210]:
lambda1=5e-4
# lrs=[1e-5, 5e-4, 1e-4, 5e-3, 1e-3]
lambda_twos=[5, 1, 5e-1, 1e-1]
lrs=[2e-5, 5e-4, 2e-4, 2e-3, 2e-2]
max_iter = 1000
corr_boost=1
best_setup = choose_lambdas(lambda_twos, lambda1, dat_oct8, vertex_labels, max_iter, lrs, corr_boost, "Continuous_revival/real_world/outputfiles")

lambda2: 5.0
lr: 2.0e-5
[16, 19, 21, 25]
1.1527161722404462
lr: 0.0005
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.02
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lambda2: 1.0
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 23]
1.7000364451816394
lr: 0.0005
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.0002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.002
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.02
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lambda2: 0.5
lr: 2.0e-5
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.013730131020688735
lr: 0.0005
[15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
0.025632568766664712
lr: 0.0002
[17, 21, 23, 25]
[14, 15, 16, 17, 18, 19, 20

Dict{Any,Any} with 10 entries:
  "lambda1"           => 0.0005
  "ATE_results_train" => [0.0 0.0359246 … 0.0127483 1.76117]
  "lr"                => 0.0005
  "corr_px_valid"     => 0.338511
  "sel_Z"             => [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
  "theta_hat"         => [-0.00093249, 6.20858, -0.02681, 0.0233086, 0.0364292,…
  "corr_p_train"      => 0.494082
  "corr_px_train"     => 0.00134359
  "ATE_results_valid" => [0.0 0.036167 … 0.0256326 1.76145]
  "lambda2"           => 0.5

MethodError: MethodError: no method matching complete_type(::String)
Closest candidates are:
  complete_type(!Matched::Type{#s68} where #s68<:Function) at /Users/Limor/.julia/packages/IJulia/gI2uA/src/handlers.jl:54
  complete_type(!Matched::Type{#s68} where #s68<:Type) at /Users/Limor/.julia/packages/IJulia/gI2uA/src/handlers.jl:55
  complete_type(!Matched::Type{#s68} where #s68<:Tuple) at /Users/Limor/.julia/packages/IJulia/gI2uA/src/handlers.jl:56
  ...

In [217]:
ATEs_oct8 = []
append!(ATEs_oct8, best_setup["ATE_results_valid"])
append!(ATEs_oct8, 0.0045665072676280725)
ATEs_oct8[1] = 0.03613994943765849

0.03613994943765849

In [299]:
ATEs_oct8[4]

0.013730131020688735

In [226]:
round(abs(ATEs_oct8[1] - ATEs_oct8[5]),digits=3), round(abs(ATEs_oct8[1] - ATEs_oct8[4]), digits=3)

(0.011, 0.022)

In [225]:
round(abs(ATEs_oct8[1] - ATEs_oct8[6]), digits=3), round(abs(ATEs_oct8[1] -ATEs_oct8[7]),digits=3)

(1.725, 0.032)