### summarize elastic net results on Block Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/block_Elnet.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_block_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_block_elnet[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_block = NULL
tmp_num_select = rep(0, length(results_block_elnet))
for (i in 1:length(results_block_elnet)){
    table_block = rbind(table_block, results_block_elnet[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab')])
    tmp_num_select[i] = mean(rowSums(results_block_elnet[[i]]$Stab.table))
    
}
table_block = as.data.frame(table_block)
table_block$num_select = tmp_num_select
table_block$FDR = round(avg_FDR,2)

In [6]:
head(table_block)

n,p,rou,FP,FN,MSE,Stab,num_select,FDR
50,50,0.1,10.9 ( 0.97 ),3.91 ( 0.15 ),0.38 ( 0.02 ),0.02,11.99,0.77
100,50,0.1,13.39 ( 1.06 ),2.45 ( 0.16 ),0.34 ( 0.01 ),0.06,15.94,0.68
500,50,0.1,19.21 ( 0.81 ),0.14 ( 0.04 ),0.27 ( 0 ),0.13,24.07,0.73
1000,50,0.1,18.76 ( 0.63 ),0.02 ( 0.01 ),0.26 ( 0 ),0.15,23.74,0.73
50,100,0.1,9.27 ( 0.76 ),4.63 ( 0.09 ),0.41 ( 0.02 ),0.03,9.64,0.82
100,100,0.1,10.1 ( 1.04 ),3.87 ( 0.13 ),0.34 ( 0.01 ),0.07,11.23,0.7


In [7]:
# export result
result.table_block <- apply(table_block,2,as.character)
rownames(result.table_block) = rownames(table_block)
result.table_block = as.data.frame(result.table_block)

# extract numbers only for 'n' & 'p'
result.table_block$n = tidyr::extract_numeric(result.table_block$n)
result.table_block$p = tidyr::extract_numeric(result.table_block$p)
result.table_block$ratio = result.table_block$p / result.table_block$n

result.table_block = result.table_block[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'num_select', 'FDR')]
colnames(result.table_block)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [8]:
# convert interested measurements to be numeric
result.table_block$Stab = as.numeric(as.character(result.table_block$Stab))
result.table_block$MSE_mean = as.numeric(substr(result.table_block$MSE, start=1, stop=4))
result.table_block$FP_mean = as.numeric(substr(result.table_block$FP, start=1, stop=4))
result.table_block$FN_mean = as.numeric(substr(result.table_block$FN, start=1, stop=4))
result.table_block$FN_mean[is.na(result.table_block$FN_mean)] = 0
result.table_block$num_select = as.numeric(as.character(result.table_block$num_select))

“NAs introduced by coercion”

In [9]:
# check whether missing values exists
result.table_block[rowSums(is.na(result.table_block)) > 0,]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
45,50,1000,0.5,20,0.04,2 ( 0.1 ),52.83 ( 3.84 ),2.29 ( 0.1 ),55.54,0.91,,52.8,2.29


In [10]:
# recover values
result.table_block$MSE_mean[45] = 2

In [11]:
result.table_block[45, ]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
45,50,1000,0.5,20,0.04,2 ( 0.1 ),52.83 ( 3.84 ),2.29 ( 0.1 ),55.54,0.91,2,52.8,2.29


In [12]:
head(result.table_block)

N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
50,50,0.1,1.0,0.02,0.38 ( 0.02 ),10.9 ( 0.97 ),3.91 ( 0.15 ),11.99,0.77,0.38,10.9,3.91
100,50,0.1,0.5,0.06,0.34 ( 0.01 ),13.39 ( 1.06 ),2.45 ( 0.16 ),15.94,0.68,0.34,13.3,2.45
500,50,0.1,0.1,0.13,0.27 ( 0 ),19.21 ( 0.81 ),0.14 ( 0.04 ),24.07,0.73,0.27,19.2,0.14
1000,50,0.1,0.05,0.15,0.26 ( 0 ),18.76 ( 0.63 ),0.02 ( 0.01 ),23.74,0.73,0.26,18.7,0.02
50,100,0.1,2.0,0.03,0.41 ( 0.02 ),9.27 ( 0.76 ),4.63 ( 0.09 ),9.64,0.82,0.41,9.27,4.63
100,100,0.1,1.0,0.07,0.34 ( 0.01 ),10.1 ( 1.04 ),3.87 ( 0.13 ),11.23,0.7,0.34,10.1,3.87


In [13]:
tail(result.table_block)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
75,500,500,0.9,1.0,0.15,0.28 ( 0 ),32.6 ( 1.76 ),0 ( 0 ),37.6,0.81,0.28,32.6,0.0
76,1000,500,0.9,0.5,0.16,0.26 ( 0 ),30.4 ( 1.66 ),0 ( 0 ),35.4,0.79,0.26,30.4,0.0
77,50,1000,0.9,20.0,0.07,3.06 ( 0.18 ),43.09 ( 2.82 ),1.57 ( 0.1 ),46.52,0.88,3.06,43.0,1.57
78,100,1000,0.9,10.0,0.12,0.59 ( 0.03 ),43.91 ( 1.31 ),0 ( 0 ),48.91,0.87,0.59,43.9,0.0
79,500,1000,0.9,2.0,0.12,0.29 ( 0 ),41.57 ( 2.28 ),0 ( 0 ),46.57,0.84,0.29,41.5,0.0
80,1000,1000,0.9,1.0,0.13,0.27 ( 0 ),38.76 ( 2.16 ),0 ( 0 ),43.76,0.82,0.27,38.7,0.0


In [14]:
## export
write.table(result.table_block, '../results_summary_cts/sim_block_elnet.txt', sep='\t', row.names=F)