### summarize elastic net results on Toeplitz Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/toe_Elnet.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_toeplitz_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_toe_elnet[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_toe = NULL
tmp_num_select = rep(0, length(results_toe_elnet))
for (i in 1:length(results_toe_elnet)){
    table_toe = rbind(table_toe, results_toe_elnet[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab')])
    tmp_num_select[i] = mean(rowSums(results_toe_elnet[[i]]$Stab.table))
    
}
table_toe = as.data.frame(table_toe)
table_toe$num_select = tmp_num_select
table_toe$FDR = round(avg_FDR,2)

In [5]:
head(table_toe)

n,p,rou,FP,FN,MSE,Stab,num_select,FDR
50,50,0.1,14.72 ( 0.57 ),0.01 ( 0.01 ),0.6 ( 0.03 ),0.21,19.71,0.67
100,50,0.1,13.7 ( 0.66 ),0 ( 0 ),0.34 ( 0.01 ),0.23,18.7,0.64
500,50,0.1,13.59 ( 0.55 ),0 ( 0 ),0.26 ( 0 ),0.23,18.59,0.65
1000,50,0.1,12.64 ( 0.54 ),0 ( 0 ),0.26 ( 0 ),0.25,17.64,0.63
50,100,0.1,19.33 ( 0.67 ),0 ( 0 ),0.69 ( 0.05 ),0.2,24.33,0.73
100,100,0.1,17.77 ( 0.8 ),0 ( 0 ),0.38 ( 0.01 ),0.22,22.77,0.7


In [6]:
# export result
result.table_toe <- apply(table_toe,2,as.character)
rownames(result.table_toe) = rownames(table_toe)
result.table_toe = as.data.frame(result.table_toe)

# extract numbers only for 'n' & 'p'
result.table_toe$n = tidyr::extract_numeric(result.table_toe$n)
result.table_toe$p = tidyr::extract_numeric(result.table_toe$p)
result.table_toe$ratio = result.table_toe$p / result.table_toe$n

result.table_toe = result.table_toe[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'num_select', 'FDR')]
colnames(result.table_toe)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [7]:
# convert interested measurements to be numeric
result.table_toe$Stab = as.numeric(as.character(result.table_toe$Stab))
result.table_toe$MSE_mean = as.numeric(substr(result.table_toe$MSE, start=1, stop=4))
result.table_toe$FP_mean = as.numeric(substr(result.table_toe$FP, start=1, stop=4))
result.table_toe$FN_mean = as.numeric(substr(result.table_toe$FN, start=1, stop=4))
result.table_toe$FN_mean[is.na(result.table_toe$FN_mean)] = 0
result.table_toe$num_select = as.numeric(as.character(result.table_toe$num_select))

“NAs introduced by coercion”

In [8]:
# check whether missing values exists
result.table_toe[rowSums(is.na(result.table_toe)) > 0,]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
24,1000,100,0.3,0.1,0.2,0.26 ( 0 ),19 ( 0.86 ),0 ( 0 ),24.0,0.71,0.26,,0.0
66,100,50,0.9,0.5,0.21,0.35 ( 0.01 ),14 ( 0.69 ),0.27 ( 0.06 ),18.73,0.66,0.35,,0.27


In [9]:
# recover values
result.table_toe$FP_mean[24] = 19
result.table_toe$FP_mean[66] = 14

In [10]:
result.table_toe[c(24, 66), ]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
24,1000,100,0.3,0.1,0.2,0.26 ( 0 ),19 ( 0.86 ),0 ( 0 ),24.0,0.71,0.26,19,0.0
66,100,50,0.9,0.5,0.21,0.35 ( 0.01 ),14 ( 0.69 ),0.27 ( 0.06 ),18.73,0.66,0.35,14,0.27


In [11]:
head(result.table_toe)

N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
50,50,0.1,1.0,0.21,0.6 ( 0.03 ),14.72 ( 0.57 ),0.01 ( 0.01 ),19.71,0.67,0.6,14.7,0.01
100,50,0.1,0.5,0.23,0.34 ( 0.01 ),13.7 ( 0.66 ),0 ( 0 ),18.7,0.64,0.34,13.7,0.0
500,50,0.1,0.1,0.23,0.26 ( 0 ),13.59 ( 0.55 ),0 ( 0 ),18.59,0.65,0.26,13.5,0.0
1000,50,0.1,0.05,0.25,0.26 ( 0 ),12.64 ( 0.54 ),0 ( 0 ),17.64,0.63,0.26,12.6,0.0
50,100,0.1,2.0,0.2,0.69 ( 0.05 ),19.33 ( 0.67 ),0 ( 0 ),24.33,0.73,0.69,19.3,0.0
100,100,0.1,1.0,0.22,0.38 ( 0.01 ),17.77 ( 0.8 ),0 ( 0 ),22.77,0.7,0.38,17.7,0.0


In [12]:
tail(result.table_toe)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
75,500,500,0.9,1.0,0.12,0.29 ( 0 ),41.14 ( 1.02 ),0 ( 0 ),46.14,0.86,0.29,41.1,0.0
76,1000,500,0.9,0.5,0.13,0.27 ( 0 ),39.21 ( 0.84 ),0 ( 0 ),44.21,0.86,0.27,39.2,0.0
77,50,1000,0.9,20.0,0.04,0.94 ( 0.05 ),36.59 ( 3.6 ),3.72 ( 0.08 ),37.87,0.92,0.94,36.5,3.72
78,100,1000,0.9,10.0,0.09,0.59 ( 0.02 ),32.05 ( 1.85 ),2.59 ( 0.06 ),34.46,0.88,0.59,32.0,2.59
79,500,1000,0.9,2.0,0.09,0.3 ( 0 ),57.69 ( 1.57 ),0 ( 0 ),62.69,0.9,0.3,57.6,0.0
80,1000,1000,0.9,1.0,0.1,0.27 ( 0 ),52.35 ( 1.6 ),0 ( 0 ),57.35,0.89,0.27,52.3,0.0


In [13]:
## export
write.table(result.table_toe, '../results_summary_cts/sim_toe_elnet.txt', sep='\t', row.names=F)