### summarize random forests results on Toeplitz Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/toe_RF.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_toeplitz_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_toe_rf[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_toe = NULL
tmp_num_select = rep(0, length(results_toe_rf))
for (i in 1:length(results_toe_rf)){
    results_toe_rf[[i]]$OOB = paste(round(mean(results_toe_rf[[i]]$OOB.list, na.rm=T),2),
                                    '(', round(FSA::se(results_toe_rf[[i]]$OOB.list, na.rm=T),2), ')')
    table_toe = rbind(table_toe, results_toe_rf[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab', 'OOB')])
    tmp_num_select[i] = mean(rowSums(results_toe_rf[[i]]$Stab.table))
}
table_toe = as.data.frame(table_toe)
table_toe$num_select = tmp_num_select
table_toe$FDR = round(avg_FDR,2)

In [5]:
head(table_toe)

n,p,rou,FP,FN,MSE,Stab,OOB,num_select,FDR
50,50,0.1,1 ( 0 ),6 ( 0 ),1.4 ( 0.06 ),,1.98 ( 0.02 ),0.0,
100,50,0.1,1.87 ( 0.14 ),4.25 ( 0.09 ),0.86 ( 0.03 ),0.19,1.76 ( 0.01 ),3.62,0.48
500,50,0.1,0.32 ( 0.05 ),1.81 ( 0.07 ),0.44 ( 0.01 ),0.77,1.29 ( 0 ),4.51,0.06
1000,50,0.1,0.15 ( 0.04 ),0.93 ( 0.07 ),0.34 ( 0 ),0.86,1.15 ( 0 ),5.22,0.02
50,100,0.1,1 ( 0 ),6 ( 0 ),1.36 ( 0.06 ),,2.19 ( 0.02 ),0.0,
100,100,0.1,4.11 ( 0.18 ),4.17 ( 0.1 ),0.93 ( 0.03 ),0.12,1.99 ( 0.01 ),5.93,0.68


In [6]:
# export result
result.table_toe <- apply(table_toe,2,as.character)
rownames(result.table_toe) = rownames(table_toe)
result.table_toe = as.data.frame(result.table_toe)

# extract numbers only for 'n' & 'p'
result.table_toe$n = tidyr::extract_numeric(result.table_toe$n)
result.table_toe$p = tidyr::extract_numeric(result.table_toe$p)
result.table_toe$ratio = result.table_toe$p / result.table_toe$n

result.table_toe = result.table_toe[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'OOB', 'num_select', 'FDR')]
colnames(result.table_toe)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [7]:
head(result.table_toe)

N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR
50,50,0.1,1.0,,1.4 ( 0.06 ),1 ( 0 ),6 ( 0 ),1.98 ( 0.02 ),0.0,
100,50,0.1,0.5,0.19,0.86 ( 0.03 ),1.87 ( 0.14 ),4.25 ( 0.09 ),1.76 ( 0.01 ),3.62,0.48
500,50,0.1,0.1,0.77,0.44 ( 0.01 ),0.32 ( 0.05 ),1.81 ( 0.07 ),1.29 ( 0 ),4.51,0.06
1000,50,0.1,0.05,0.86,0.34 ( 0 ),0.15 ( 0.04 ),0.93 ( 0.07 ),1.15 ( 0 ),5.22,0.02
50,100,0.1,2.0,,1.36 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.19 ( 0.02 ),0.0,
100,100,0.1,1.0,0.12,0.93 ( 0.03 ),4.11 ( 0.18 ),4.17 ( 0.1 ),1.99 ( 0.01 ),5.93,0.68


In [8]:
# convert interested measurements to be numeric
result.table_toe$Stab = as.numeric(as.character(result.table_toe$Stab))
result.table_toe$MSE_mean = as.numeric(substr(result.table_toe$MSE, start=1, stop=4))
result.table_toe$FP_mean = as.numeric(substr(result.table_toe$FP, start=1, stop=4))
result.table_toe$FN_mean = as.numeric(substr(result.table_toe$FN, start=1, stop=4))
result.table_toe$FN_mean[is.na(result.table_toe$FN_mean)] = 0
result.table_toe$OOB_mean = as.numeric(substr(result.table_toe$OOB, start=1, stop=4))
result.table_toe$num_select = as.numeric(as.character(result.table_toe$num_select))

“NAs introduced by coercion”

In [9]:
# check whether missing values exists
result.table_toe[rowSums(is.na(result.table_toe)) > 0,]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
1,50,50,0.1,1,,1.4 ( 0.06 ),1 ( 0 ),6 ( 0 ),1.98 ( 0.02 ),0.0,,1.4,,0.0,1.98
5,50,100,0.1,2,,1.36 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.19 ( 0.02 ),0.0,,1.36,,0.0,2.19
9,50,500,0.1,10,,1.29 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.39 ( 0.02 ),0.0,,1.29,,0.0,2.39
13,50,1000,0.1,20,,1.35 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.42 ( 0.02 ),0.0,,1.35,,0.0,2.42
14,100,1000,0.1,10,0.01,1 ( 0.03 ),49.42 ( 0.7 ),4.71 ( 0.09 ),2.39 ( 0.02 ),50.71,0.97,,49.4,4.71,2.39
17,50,50,0.3,1,,1.22 ( 0.05 ),1 ( 0 ),6 ( 0 ),1.87 ( 0.02 ),0.0,,1.22,,0.0,1.87
21,50,100,0.3,2,,1.15 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.03 ( 0.02 ),0.0,,1.15,,0.0,2.03
25,50,500,0.3,10,,1.14 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.24 ( 0.02 ),0.0,,1.14,,0.0,2.24
29,50,1000,0.3,20,,1.2 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.29 ( 0.02 ),0.0,,1.2,,0.0,2.29
33,50,50,0.5,1,,0.99 ( 0.05 ),1 ( 0 ),6 ( 0 ),1.68 ( 0.02 ),0.0,,0.99,,0.0,1.68


In [10]:
# recover values
result.table_toe$Stab[is.na(result.table_toe$Stab)] = 0
result.table_toe$MSE_mean[is.na(result.table_toe$MSE_mean)] = 1
result.table_toe$FP_mean[is.na(result.table_toe$FP_mean)] = 1
result.table_toe$FN_mean[result.table_toe$num_select == 0] = 6
result.table_toe$OOB_mean[is.na(result.table_toe$OOB_mean)] = 1

In [11]:
result.table_toe[c(1,5,9,13,14,17,21,25,29,33,37,41,45,49,53,57,61,65,69,73,77), ]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
1,50,50,0.1,1,0.0,1.4 ( 0.06 ),1 ( 0 ),6 ( 0 ),1.98 ( 0.02 ),0.0,,1.4,1.0,6.0,1.98
5,50,100,0.1,2,0.0,1.36 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.19 ( 0.02 ),0.0,,1.36,1.0,6.0,2.19
9,50,500,0.1,10,0.0,1.29 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.39 ( 0.02 ),0.0,,1.29,1.0,6.0,2.39
13,50,1000,0.1,20,0.0,1.35 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.42 ( 0.02 ),0.0,,1.35,1.0,6.0,2.42
14,100,1000,0.1,10,0.01,1 ( 0.03 ),49.42 ( 0.7 ),4.71 ( 0.09 ),2.39 ( 0.02 ),50.71,0.97,1.0,49.4,4.71,2.39
17,50,50,0.3,1,0.0,1.22 ( 0.05 ),1 ( 0 ),6 ( 0 ),1.87 ( 0.02 ),0.0,,1.22,1.0,6.0,1.87
21,50,100,0.3,2,0.0,1.15 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.03 ( 0.02 ),0.0,,1.15,1.0,6.0,2.03
25,50,500,0.3,10,0.0,1.14 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.24 ( 0.02 ),0.0,,1.14,1.0,6.0,2.24
29,50,1000,0.3,20,0.0,1.2 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.29 ( 0.02 ),0.0,,1.2,1.0,6.0,2.29
33,50,50,0.5,1,0.0,0.99 ( 0.05 ),1 ( 0 ),6 ( 0 ),1.68 ( 0.02 ),0.0,,0.99,1.0,6.0,1.68


In [12]:
head(result.table_toe)

N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
50,50,0.1,1.0,0.0,1.4 ( 0.06 ),1 ( 0 ),6 ( 0 ),1.98 ( 0.02 ),0.0,,1.4,1.0,6.0,1.98
100,50,0.1,0.5,0.19,0.86 ( 0.03 ),1.87 ( 0.14 ),4.25 ( 0.09 ),1.76 ( 0.01 ),3.62,0.48,0.86,1.87,4.25,1.76
500,50,0.1,0.1,0.77,0.44 ( 0.01 ),0.32 ( 0.05 ),1.81 ( 0.07 ),1.29 ( 0 ),4.51,0.06,0.44,0.32,1.81,1.29
1000,50,0.1,0.05,0.86,0.34 ( 0 ),0.15 ( 0.04 ),0.93 ( 0.07 ),1.15 ( 0 ),5.22,0.02,0.34,0.15,0.93,1.15
50,100,0.1,2.0,0.0,1.36 ( 0.06 ),1 ( 0 ),6 ( 0 ),2.19 ( 0.02 ),0.0,,1.36,1.0,6.0,2.19
100,100,0.1,1.0,0.12,0.93 ( 0.03 ),4.11 ( 0.18 ),4.17 ( 0.1 ),1.99 ( 0.01 ),5.93,0.68,0.93,4.11,4.17,1.99


In [13]:
tail(result.table_toe)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
75,500,500,0.9,1.0,0.14,0.17 ( 0 ),21.38 ( 0.6 ),3 ( 0.07 ),0.99 ( 0 ),24.38,0.87,0.17,21.3,0.0,0.99
76,1000,500,0.9,0.5,0.2,0.16 ( 0 ),18.68 ( 0.53 ),2.35 ( 0.06 ),0.96 ( 0 ),22.33,0.83,0.16,18.6,2.35,0.96
77,50,1000,0.9,20.0,0.0,0.29 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.11 ( 0.01 ),0.0,,0.29,1.0,6.0,1.11
78,100,1000,0.9,10.0,0.01,0.2 ( 0.01 ),49.91 ( 0.83 ),4.99 ( 0.09 ),1.09 ( 0.01 ),50.92,0.98,0.2,49.9,4.99,1.09
79,500,1000,0.9,2.0,0.06,0.19 ( 0 ),46.34 ( 0.86 ),3.13 ( 0.08 ),1.04 ( 0 ),49.21,0.94,0.19,46.3,3.13,1.04
80,1000,1000,0.9,1.0,0.09,0.18 ( 0 ),43.52 ( 0.89 ),2.7 ( 0.07 ),1.02 ( 0 ),46.82,0.93,0.18,43.5,2.7,1.02


In [14]:
## export
write.table(result.table_toe, '../results_summary_cts/sim_toe_rf.txt', sep='\t', row.names=F)