In [2]:
source('utils.r')
source('selinf_functions.r')
source('metrics.r')
source('data_generator.r')
source('cov_matrix.r')

## Simulation comparing original variance estimate and new
We want to check if the addition of the new variance estimate improves power in the selection. We compare on FDR, TPR and average length of confidence intervals.

In [None]:
set.seed(1)

n_subjects= 25
n_observations = 4
n = n_subjects * n_observations
p = 100
q <- n_observations-1

SNR_values = c(2,4)
prop_relevant_values = c(0.1, 0.2, 0.3)
num_iter = 10

tot_its <- length(SNR_values)*length(prop_relevant_values)*num_iter

columns = c('SNR','prop_relevant', 'TPR_old', 'TPR_new', 'FDR_old', 'FDR_new', 'avg_ci_length_old', 'avg_ci_length_new')
results_df = data.frame(matrix(nrow = 0, ncol = length(columns))) 
colnames(results_df) = columns

pb = txtProgressBar(min = 0, max = tot_its, initial = 0, style = 3)

for(SNR in SNR_values){
    for(prop_relevant in prop_relevant_values){
        for(i in 1:num_iter){

            data <- data_generator_random_time(n_subjects, n_observations, p, SNR, prop_relevant, rho=0.5)
            X <- data$X
            Z <- data$Z
            subjects <- data$subjects
            y <- data$y
            beta <- data$beta
            sd <- data$sd

            # fixing lambda
            lambda = 10
            
            selFun <- function(y) selFun_fixed_lambda(X, subjects, y, fixed_form, rand_form, lambda)

            checkFun <- function(yb){

                all(selFun(yb)$vec == sel_vec)

            }

            sel_form = as.formula(
            paste("y ~ ",paste(sel_names[2:length(sel_names)], collapse='+'), "+ (t1 + t2 +t3|subjects)")
            )

            control <- lmerControl(
            check.nobs.vs.rankZ = "ignore",
            check.nobs.vs.nlev = "ignore",
            check.nlev.gtreq.5 = "ignore",
            check.nlev.gtr.1 = "ignore",
            check.nobs.vs.nRE= "ignore",
            )


            final_model = lmer(formula = sel_form, control= control ,data=data.frame(X, subjects, y))

            # and compute valid p-values conditional on the selection
            # (this takes some time and will produce a lot of warnings)
            suppressWarnings(res <- mocasin(final_model, this_y = y, conditional = FALSE,
                        checkFun = checkFun, nrSamples = 50, trace = FALSE))

            sel_with_selinf <- selection_with_selinf(res, sel_vec, fdr_level = 0.1)
            
            selinf_metrics <- metrics(sel_with_selinf,c(1,beta!=0))

            avg_ci_length_selinf <- ci_length(res)

            results_df[nrow(results_df) + 1,] <- c(SNR, prop_relevant, selinf_metrics$tpr, NA, selinf_metrics$fdr NA, avg_ci_length_selinf, NA)

            setTxtProgressBar(pb,nrow(results_df))

        }
    }
}

close(pb)

In [None]:
source('minMod_modif.r')

In [None]:
set.seed(1)

n_subjects= 25
n_observations = 4
n = n_subjects * n_observations
p = 100
q <- n_observations-1

SNR_values = c(2,4)
prop_relevant_values = c(0.1, 0.2, 0.3)
num_iter = 10

tot_its <- length(SNR_values)*length(prop_relevant_values)*num_iter

pb = txtProgressBar(min = 0, max = tot_its, initial = 0, style = 3)

for(SNR in SNR_values){
    for(prop_relevant in prop_relevant_values){
        for(i in 1:num_iter){

            data <- data_generator_random_time(n_subjects, n_observations, p, SNR, prop_relevant, rho=0.5)
            X <- data$X
            Z <- data$Z
            subjects <- data$subjects
            y <- data$y
            beta <- data$beta
            sd <- data$sd

            # fixing lambda
            lambda = 10
            
            selFun <- function(y) selFun_fixed_lambda(X, subjects, y, fixed_form, rand_form, lambda)

            checkFun <- function(yb){

                all(selFun(yb)$vec == sel_vec)

            }

            sel_form = as.formula(
            paste("y ~ ",paste(sel_names[2:length(sel_names)], collapse='+'), "+ (t1 + t2 +t3|subjects)")
            )

            control <- lmerControl(
            check.nobs.vs.rankZ = "ignore",
            check.nobs.vs.nlev = "ignore",
            check.nlev.gtreq.5 = "ignore",
            check.nlev.gtr.1 = "ignore",
            check.nobs.vs.nRE= "ignore",
            )


            final_model = lmer(formula = sel_form, control= control ,data=data.frame(X, subjects, y))

            # and compute valid p-values conditional on the selection
            # (this takes some time and will produce a lot of warnings)
            suppressWarnings(res <- mocasin(final_model, this_y = y, conditional = FALSE,
                        checkFun = checkFun, nrSamples = 50, trace = FALSE))

            sel_with_selinf <- selection_with_selinf(res, sel_vec, fdr_level = 0.1)
            
            selinf_metrics <- metrics(sel_with_selinf,c(1,beta!=0))

            avg_ci_length_selinf <- ci_length(res)

            results_df[nrow(results_df) + 1,'TPR_new'] <- selinf_metrics$tpr
            results_df[nrow(results_df) + 1,'FDR_new'] <- selinf_metrics$fdr
            results_df[nrow(results_df) + 1,'avg_ci_length_new'] <- avg_ci_length_selinf

            setTxtProgressBar(pb,nrow(results_df))

        }
    }
}

close(pb)

In [13]:
write.csv(results_df, "C:/Users/dalma/Desktop/Matteo/phd/r/simulation_results_varest.csv", row.names=FALSE)