In [2]:
source('utils.r')
source('selinf_functions.r')
source('metrics.r')
source('data_generator.r')
source('cov_matrix.r')

## Testing selection results

Comparing Lasso with Selective Inference and plain Lasso with varying SNR and actual proportion of non-zero coefficients.
Both methods use a fixed $\lambda = ||X^Ty||_{\infty}/2$.

In [4]:
set.seed(1)

n_subjects= 25
n_observations = 4
n = n_subjects * n_observations
p = 200
q = 0
SNR = 4
prop_relevant = 0.2

data <- data_generator(n_subjects, n_observations, p, q, SNR, prop_relevant)
X <- data$X
subjects <- data$subjects
y <- data$y
beta <- data$beta
sd <- data$sd

# fixing lambda
lambda = max(abs(t(X)%*%y))/2

## Lasso selection

In [5]:
fixed_form = as.formula(
    paste("y ~ 1 +",paste("X", 1:p, sep="", collapse='+'),sep="")
  )

rnd_form = list(subjects =~ 1)


selFun <- function(y) selFun_fixed_lambda(X, subjects, y, fixed_form, rand_form, lambda)

sel <- selFun(y)
sel_vec <- sel$vec
sel_names <- sel$names

In [6]:
print(metrics(sel_vec,c(TRUE,beta!=0)))

$tpr
[1] 0.3170732

$fdr
[1] 0.6176471



## Adding Selective Inference

In [7]:
# Now we can define the function checking the congruency
# with the original selection
checkFun <- function(yb){

  all(selFun(yb)$vec == sel_vec)

}

sel_form = as.formula(
  paste("y ~ ",paste(sel_names[2:length(sel_names)], collapse='+'), "+ (1|subjects)")
)

final_model = lmer(formula = sel_form, data=data.frame(X, subjects, y))

boundary (singular) fit: see help('isSingular')



In [9]:
# and compute valid p-values conditional on the selection
# (this takes some time and will produce a lot of warnings)
suppressWarnings(res <- mocasin(final_model, this_y = y, conditional = FALSE,
               checkFun = checkFun, nrSamples = 100))

Computing inference for variable (location)  1 



Computing inference for variable (location)  2 



Computing inference for variable (location)  3 



Computing inference for variable (location)  4 



Computing inference for variable (location)  5 



Computing inference for variable (location)  6 



Computing inference for variable (location)  7 



Computing inference for variable (location)  8 



Computing inference for variable (location)  9 



Computing inference for variable (location)  10 



Computing inference for variable (location)  11 



Computing inference for variable (location)  12 



Computing inference for variable (location)  13 



Computing inference for variable (location)  14 



Computing inference for variable (location)  15 



Computing inference for variable (location)  16 



Computing inference for variable (location)  17 



Computing inference for variable (location)  18 



Computing inference for variable (location)  19 



Computing inference f

In [10]:
# create a boolean vector for the ones selected at 5% significance level

sel_with_selinf <- c(rep(0,p+1))
names(sel_with_selinf) <- names(sel_vec)
only_sel <- names(sel_vec[sel_vec == 1])

for(i in 1:length(res$selinf)){
   if(res$selinf[[i]]['pval']<0.05) sel_with_selinf[only_sel[i]] <- 1
   }

In [11]:
metrics(sel_with_selinf,c(1,beta!=0))