### summarize lasso results on Block Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/block_Lasso.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_block_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_block_lasso[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_block = NULL
tmp_num_select = rep(0, length(results_block_lasso))
for (i in 1:length(results_block_lasso)){
    table_block = rbind(table_block, results_block_lasso[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab')])
    tmp_num_select[i] = mean(rowSums(results_block_lasso[[i]]$Stab.table))
    
}
table_block = as.data.frame(table_block)
table_block$num_select = tmp_num_select
table_block$FDR = round(avg_FDR,2)

In [7]:
head(table_block)

n,p,rou,FP,FN,MSE,Stab,num_select,FDR
50,50,0.1,3.52 ( 0.22 ),4.92 ( 0.08 ),0.36 ( 0.01 ),0.05,3.6,0.65
100,50,0.1,2.71 ( 0.25 ),4.4 ( 0.1 ),0.31 ( 0.01 ),0.18,3.31,0.42
500,50,0.1,5.42 ( 0.25 ),1.97 ( 0.11 ),0.29 ( 0 ),0.35,8.45,0.48
1000,50,0.1,4.91 ( 0.2 ),1.57 ( 0.09 ),0.28 ( 0 ),0.44,8.34,0.44
50,100,0.1,4.73 ( 0.22 ),4.79 ( 0.08 ),0.37 ( 0.02 ),0.06,4.94,0.72
100,100,0.1,3 ( 0.15 ),4.75 ( 0.09 ),0.34 ( 0.01 ),0.13,3.25,0.57


In [8]:
# export result
result.table_block <- apply(table_block,2,as.character)
rownames(result.table_block) = rownames(table_block)
result.table_block = as.data.frame(result.table_block)

# extract numbers only for 'n' & 'p'
result.table_block$n = tidyr::extract_numeric(result.table_block$n)
result.table_block$p = tidyr::extract_numeric(result.table_block$p)
result.table_block$ratio = result.table_block$p / result.table_block$n

result.table_block = result.table_block[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'num_select', 'FDR')]
colnames(result.table_block)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [9]:
# convert interested measurements to be numeric
result.table_block$Stab = as.numeric(as.character(result.table_block$Stab))
result.table_block$MSE_mean = as.numeric(substr(result.table_block$MSE, start=1, stop=4))
result.table_block$FP_mean = as.numeric(substr(result.table_block$FP, start=1, stop=4))
result.table_block$FN_mean = as.numeric(substr(result.table_block$FN, start=1, stop=4))
result.table_block$FN_mean[is.na(result.table_block$FN_mean)] = 0
result.table_block$num_select = as.numeric(as.character(result.table_block$num_select))

“NAs introduced by coercion”

In [10]:
# check whether missing values exists
result.table_block[rowSums(is.na(result.table_block)) > 0,]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
6,100,100,0.1,1,0.13,0.34 ( 0.01 ),3 ( 0.15 ),4.75 ( 0.09 ),3.25,0.57,0.34,,4.75


In [11]:
# recover values
result.table_block$FP_mean[is.na(result.table_block$FP_mean)] = 3

In [12]:
result.table_block[6, ]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
6,100,100,0.1,1,0.13,0.34 ( 0.01 ),3 ( 0.15 ),4.75 ( 0.09 ),3.25,0.57,0.34,3,4.75


In [13]:
head(result.table_block)

N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
50,50,0.1,1.0,0.05,0.36 ( 0.01 ),3.52 ( 0.22 ),4.92 ( 0.08 ),3.6,0.65,0.36,3.52,4.92
100,50,0.1,0.5,0.18,0.31 ( 0.01 ),2.71 ( 0.25 ),4.4 ( 0.1 ),3.31,0.42,0.31,2.71,4.4
500,50,0.1,0.1,0.35,0.29 ( 0 ),5.42 ( 0.25 ),1.97 ( 0.11 ),8.45,0.48,0.29,5.42,1.97
1000,50,0.1,0.05,0.44,0.28 ( 0 ),4.91 ( 0.2 ),1.57 ( 0.09 ),8.34,0.44,0.28,4.91,1.57
50,100,0.1,2.0,0.06,0.37 ( 0.02 ),4.73 ( 0.22 ),4.79 ( 0.08 ),4.94,0.72,0.37,4.73,4.79
100,100,0.1,1.0,0.13,0.34 ( 0.01 ),3 ( 0.15 ),4.75 ( 0.09 ),3.25,0.57,0.34,3.0,4.75


In [14]:
tail(result.table_block)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
75,500,500,0.9,1.0,0.5,0.29 ( 0 ),6.87 ( 0.54 ),0 ( 0 ),11.87,0.41,0.29,6.87,0.0
76,1000,500,0.9,0.5,0.71,0.27 ( 0 ),3.37 ( 0.27 ),0 ( 0 ),8.37,0.23,0.27,3.37,0.0
77,50,1000,0.9,20.0,0.11,2.46 ( 0.16 ),25.37 ( 0.35 ),1.73 ( 0.12 ),28.64,0.85,2.46,25.3,1.73
78,100,1000,0.9,10.0,0.22,0.62 ( 0.03 ),21.43 ( 0.62 ),0 ( 0 ),26.43,0.76,0.62,21.4,0.0
79,500,1000,0.9,2.0,0.46,0.3 ( 0 ),7.99 ( 0.79 ),0 ( 0 ),12.99,0.42,0.3,7.99,0.0
80,1000,1000,0.9,1.0,0.6,0.28 ( 0 ),4.93 ( 0.43 ),0 ( 0 ),9.93,0.31,0.28,4.93,0.0


In [15]:
## export
write.table(result.table_block, '../results_summary_cts/sim_block_lasso.txt', sep='\t', row.names=F)