### summarize compositional lasso results on Toeplitz Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/toe_compLasso.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_toeplitz_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_toe_compLasso[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_toe = NULL
tmp_num_select = rep(0, length(results_toe_compLasso))
for (i in 1:length(results_toe_compLasso)){
    table_toe = rbind(table_toe, results_toe_compLasso[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab')])
    tmp_num_select[i] = mean(rowSums(results_toe_compLasso[[i]]$Stab.table))
    
}
table_toe = as.data.frame(table_toe)
table_toe$num_select = tmp_num_select
table_toe$FDR = round(avg_FDR,2)

In [5]:
head(table_toe)

n,p,rou,FP,FN,MSE,Stab,num_select,FDR
50,50,0.1,3.68 ( 0.29 ),0.06 ( 0.03 ),0.88 ( 0.05 ),0.56,9.62,0.33
100,50,0.1,1.51 ( 0.35 ),0 ( 0 ),0.87 ( 0.05 ),0.77,7.51,0.12
500,50,0.1,0.73 ( 0.19 ),0 ( 0 ),0.99 ( 0.05 ),0.88,6.73,0.07
1000,50,0.1,0.55 ( 0.17 ),0 ( 0 ),1.09 ( 0.04 ),0.9,6.55,0.05
50,100,0.1,4.24 ( 0.3 ),0.23 ( 0.06 ),1.12 ( 0.1 ),0.53,10.01,0.37
100,100,0.1,1.51 ( 0.34 ),0 ( 0 ),0.85 ( 0.04 ),0.79,7.51,0.13


In [6]:
# export result
result.table_toe <- apply(table_toe,2,as.character)
rownames(result.table_toe) = rownames(table_toe)
result.table_toe = as.data.frame(result.table_toe)

# extract numbers only for 'n' & 'p'
result.table_toe$n = tidyr::extract_numeric(result.table_toe$n)
result.table_toe$p = tidyr::extract_numeric(result.table_toe$p)
result.table_toe$ratio = result.table_toe$p / result.table_toe$n

result.table_toe = result.table_toe[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'num_select', 'FDR')]
colnames(result.table_toe)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [7]:
# convert interested measurements to be numeric
result.table_toe$Stab = as.numeric(as.character(result.table_toe$Stab))
result.table_toe$MSE_mean = as.numeric(substr(result.table_toe$MSE, start=1, stop=4))
result.table_toe$FP_mean = as.numeric(substr(result.table_toe$FP, start=1, stop=4))
result.table_toe$FN_mean = as.numeric(substr(result.table_toe$FN, start=1, stop=4))
result.table_toe$FN_mean[is.na(result.table_toe$FN_mean)] = 0
result.table_toe$num_select = as.numeric(as.character(result.table_toe$num_select))

“NAs introduced by coercion”

In [8]:
# check whether missing values exists
result.table_toe[rowSums(is.na(result.table_toe)) > 0,]

N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean


In [9]:
head(result.table_toe)

N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
50,50,0.1,1.0,0.56,0.88 ( 0.05 ),3.68 ( 0.29 ),0.06 ( 0.03 ),9.62,0.33,0.88,3.68,0.06
100,50,0.1,0.5,0.77,0.87 ( 0.05 ),1.51 ( 0.35 ),0 ( 0 ),7.51,0.12,0.87,1.51,0.0
500,50,0.1,0.1,0.88,0.99 ( 0.05 ),0.73 ( 0.19 ),0 ( 0 ),6.73,0.07,0.99,0.73,0.0
1000,50,0.1,0.05,0.9,1.09 ( 0.04 ),0.55 ( 0.17 ),0 ( 0 ),6.55,0.05,1.09,0.55,0.0
50,100,0.1,2.0,0.53,1.12 ( 0.1 ),4.24 ( 0.3 ),0.23 ( 0.06 ),10.01,0.37,1.12,4.24,0.23
100,100,0.1,1.0,0.79,0.85 ( 0.04 ),1.51 ( 0.34 ),0 ( 0 ),7.51,0.13,0.85,1.51,0.0


In [10]:
tail(result.table_toe)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,num_select,FDR,MSE_mean,FP_mean,FN_mean
75,500,500,0.9,1.0,0.41,0.34 ( 0.01 ),8.17 ( 0.48 ),0.17 ( 0.04 ),14.0,0.54,0.34,8.17,0.17
76,1000,500,0.9,0.5,0.58,0.32 ( 0 ),4.24 ( 0.3 ),0.01 ( 0.01 ),10.23,0.37,0.32,4.24,0.01
77,50,1000,0.9,20.0,0.19,1.04 ( 0.06 ),5.24 ( 0.46 ),4.23 ( 0.08 ),7.01,0.6,1.04,5.24,4.23
78,100,1000,0.9,10.0,0.22,0.67 ( 0.02 ),9.03 ( 0.73 ),3.02 ( 0.07 ),12.01,0.66,0.67,9.03,3.02
79,500,1000,0.9,2.0,0.34,0.35 ( 0.01 ),10.52 ( 0.68 ),0.32 ( 0.05 ),16.2,0.59,0.35,10.5,0.32
80,1000,1000,0.9,1.0,0.57,0.33 ( 0 ),4.48 ( 0.3 ),0.01 ( 0.01 ),10.47,0.39,0.33,4.48,0.01


In [11]:
## export
write.table(result.table_toe, '../results_summary_cts/sim_toe_compLasso.txt', sep='\t', row.names=F)