## Simulation comparing naive GLMMLasso and Selective Inference
Comparing TPR and FDR for the two methods in different settings of SNR and proportion of non-zero coefficients.

In [11]:
n_subjects= 25
n_observations = 4
p = 100
q = 0

SNR_values = c(4)
prop_relevant_values = c(0.1, 0.2, 0.3)
num_iter = 50

fixed_form = as.formula(
    paste("y ~ 1 +",paste("X", 1:p, sep="", collapse='+'),sep="")
)

rand_form = list(subjects =~ 1)

tot_its <- length(SNR_values)*length(prop_relevant_values)*num_iter

columns = c('SNR','prop_relevant', 'TPR_lasso', 'TPR_selinf', 'FDR_lasso', 'FDR_selinf', 'num_sel_lasso', 'num_sel_selinf')
results_df = data.frame(matrix(nrow = 0, ncol = length(columns))) 
colnames(results_df) = columns

pb = txtProgressBar(min = 0, max = tot_its, initial = 0, style = 3)

for(SNR in SNR_values){
    for(prop_relevant in prop_relevant_values){
        for(i in 1:num_iter){

            data <- data_generator(n_subjects, n_observations, p, q, SNR, prop_relevant)
            X <- data$X
            subjects <- data$subjects
            y <- data$y
            beta <- data$beta

            # fixing lambda
            lambda = max(abs(t(X) %*% y)) / 2
            

            selFun <- function(y) selFun_fixed_lambda(X, subjects, y, fixed_form, rand_form, lambda)

            # Now we can define the function checking the congruency
            # with the original selection
            checkFun <- function(yb){

                all(selFun(yb)$vec == sel_vec)

            }

            
            sel <- selFun(y)
            sel_vec <- sel$vec
            sel_names <- sel$names

            sel_form = as.formula(
            paste("y ~ ",paste(sel_names[2:length(sel_names)], collapse='+'), "+ (1|subjects)")
            )

            final_model = lmer(formula = sel_form, data = data.frame(X, subjects, y))
            
            # and compute valid p-values conditional on the selection
            # (this takes some time and will produce a lot of warnings)
            suppressWarnings(res <- mocasin(final_model, this_y = y, conditional = FALSE,
                        checkFun = checkFun, nrSamples = 100, trace = FALSE))

            sel_with_selinf <- c(rep(0,p+1))
            names(sel_with_selinf) <- names(sel_vec)
            only_sel <- names(sel_vec[sel_vec == 1])

            for (i in 1:length(res$selinf)) {
                if (res$selinf[[i]]["pval"] < 0.05) sel_with_selinf[only_sel[i]] <- 1
            }
            
            lasso_metrics <- metrics(sel_vec,c(1,beta!=0))
            selinf_metrics <- metrics(sel_with_selinf,c(1,beta!=0))

            num_sel_lasso <- sum(sel_vec)
            num_sel_selinf <- sum(sel_with_selinf)

            results_df[nrow(results_df) + 1,] <- c(SNR, prop_relevant, lasso_metrics$tpr, selinf_metrics$tpr, lasso_metrics$fdr, selinf_metrics$fdr, num_sel_lasso, num_sel_selinf)

            setTxtProgressBar(pb,nrow(results_df))

        }
    }
}

close(pb)



boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





boundary (singular) fit: see help('isSingular')





In [16]:
results_df

Unnamed: 0_level_0,SNR,prop_relevant,TPR_lasso,TPR_selinf,FDR_lasso,FDR_selinf,num_sel_lasso,num_sel_selinf
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,4,0.1,1.0000000,1.0000000,0.31250000,0.00000000,16,11
2,4,0.1,0.5454545,0.5454545,0.00000000,0.00000000,6,6
3,4,0.1,0.6363636,0.3636364,0.22222222,0.20000000,9,5
4,4,0.1,0.9090909,0.7272727,0.23076923,0.20000000,13,10
5,4,0.1,1.0000000,0.9090909,0.26666667,0.09090909,15,11
6,4,0.1,0.5454545,0.3636364,0.00000000,0.00000000,6,4
7,4,0.1,0.9090909,0.6363636,0.23076923,0.00000000,13,7
8,4,0.1,0.5454545,0.2727273,0.00000000,0.00000000,6,3
9,4,0.1,0.5454545,0.4545455,0.00000000,0.00000000,6,5
10,4,0.1,0.9090909,0.8181818,0.41176471,0.18181818,17,11


In [13]:
write.csv(results_df, "C:/Users/dalma/Desktop/Matteo/phd/r/simulation_results_SNR4.csv", row.names=FALSE)