### summarize random forests results on Block Simulation Scenarios for continuous outcome

In [1]:
dir = '/panfs/panfs1.ucsd.edu/panscratch/lij014/Stability_2020/sim_data'

load(paste0(dir, '/block_RF.RData'))

In [2]:
dim.list = list()
size = c(50, 100, 500, 1000)
idx = 0
for (P in size){
    for (N in size){
        idx = idx + 1
        dim.list[[idx]] = c(P=P, N=N)
    }
}

rou.list = seq(0.1, 0.9, 0.2)

files = NULL
for (rou in rou.list){
  for (dim in dim.list){
    p = dim[1]
    n = dim[2]
    files = cbind(files, paste0(dir, '/sim_block_corr', rou, paste('P', p, 'N', n, sep='_'), '.RData', sep=''))
  }
}

In [3]:
avg_FDR = NULL
for (i in 1:length(files)){
    sim_file = files[i]
    load(sim_file, dat <- new.env())
    sub = dat$sim_array[[i]]
    p = sub$p # take true values from 1st replicate of each simulated data
    coef = sub$beta
    coef.true = which(coef != 0)
    
    tt = results_block_rf[[i]]$Stab.table
    
    FDR = NULL
    for (r in 1:nrow(tt)){
        FDR = c(FDR, length(setdiff(which(tt[r, ] !=0), coef.true))/sum(tt[r, ]))

    }
    
    avg_FDR = c(avg_FDR, mean(FDR, na.rm=T))
}

In [4]:
table_block = NULL
tmp_num_select = rep(0, length(results_block_rf))
for (i in 1:length(results_block_rf)){
    results_block_rf[[i]]$OOB = paste(round(mean(results_block_rf[[i]]$OOB.list, na.rm=T),2),
                                    '(', round(FSA::se(results_block_rf[[i]]$OOB.list, na.rm=T),2), ')')
    table_block = rbind(table_block, results_block_rf[[i]][c('n', 'p', 'rou', 'FP', 'FN', 'MSE', 'Stab', 'OOB')])
    tmp_num_select[i] = mean(rowSums(results_block_rf[[i]]$Stab.table))
}
table_block = as.data.frame(table_block)
table_block$num_select = tmp_num_select
table_block$FDR = round(avg_FDR,2)

In [6]:
head(table_block)

n,p,rou,FP,FN,MSE,Stab,OOB,num_select,FDR
50,50,0.1,1 ( 0 ),6 ( 0 ),0.08 ( 0 ),,0.59 ( 0.01 ),0.0,
100,50,0.1,2.26 ( 0.17 ),5.06 ( 0.12 ),0.07 ( 0 ),0.03,0.58 ( 0 ),3.04,0.7
500,50,0.1,2.34 ( 0.16 ),3.77 ( 0.14 ),0.05 ( 0 ),0.23,0.56 ( 0 ),4.56,0.49
1000,50,0.1,2.37 ( 0.16 ),2.54 ( 0.11 ),0.05 ( 0 ),0.51,0.55 ( 0 ),5.83,0.38
50,100,0.1,1 ( 0 ),6 ( 0 ),0.08 ( 0 ),,0.6 ( 0.01 ),0.0,
100,100,0.1,4.76 ( 0.27 ),5.51 ( 0.09 ),0.07 ( 0 ),0.01,0.58 ( 0 ),5.24,0.91


In [7]:
# export result
result.table_block <- apply(table_block,2,as.character)
rownames(result.table_block) = rownames(table_block)
result.table_block = as.data.frame(result.table_block)

# extract numbers only for 'n' & 'p'
result.table_block$n = tidyr::extract_numeric(result.table_block$n)
result.table_block$p = tidyr::extract_numeric(result.table_block$p)
result.table_block$ratio = result.table_block$p / result.table_block$n

result.table_block = result.table_block[c('n', 'p', 'rou', 'ratio', 'Stab', 'MSE', 'FP', 'FN', 'OOB', 'num_select', 'FDR')]
colnames(result.table_block)[1:4] = c('N', 'P', 'Corr', 'Ratio')

extract_numeric() is deprecated: please use readr::parse_number() instead
extract_numeric() is deprecated: please use readr::parse_number() instead


In [8]:
# convert interested measurements to be numeric
result.table_block$Stab = as.numeric(as.character(result.table_block$Stab))
result.table_block$MSE_mean = as.numeric(substr(result.table_block$MSE, start=1, stop=4))
result.table_block$FP_mean = as.numeric(substr(result.table_block$FP, start=1, stop=4))
result.table_block$FN_mean = as.numeric(substr(result.table_block$FN, start=1, stop=4))
result.table_block$FN_mean[is.na(result.table_block$FN_mean)] = 0
result.table_block$OOB_mean = as.numeric(substr(result.table_block$OOB, start=1, stop=4))
result.table_block$num_select = as.numeric(as.character(result.table_block$num_select))

“NAs introduced by coercion”

In [9]:
# check whether missing values exists
result.table_block[rowSums(is.na(result.table_block)) > 0,]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
1,50,50,0.1,1,,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0.0,,0.08,,0.0,0.59
5,50,100,0.1,2,,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.6 ( 0.01 ),0.0,,0.08,,0.0,0.6
9,50,500,0.1,10,,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0.0,,0.08,,0.0,0.59
13,50,1000,0.1,20,,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0.0,,0.08,,0.0,0.59
17,50,50,0.3,1,,0.31 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.14 ( 0.01 ),0.0,,0.31,,0.0,1.14
21,50,100,0.3,2,,0.32 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.2 ( 0.01 ),0.0,,0.32,,0.0,1.2
25,50,500,0.3,10,,0.36 ( 0.02 ),1 ( 0 ),6 ( 0 ),1.23 ( 0.01 ),0.0,,0.36,,0.0,1.23
29,50,1000,0.3,20,,0.34 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.26 ( 0.01 ),0.0,,0.34,,0.0,1.26
33,50,50,0.5,1,,0.6 ( 0.03 ),1 ( 0 ),6 ( 0 ),1.46 ( 0.01 ),0.0,,0.6,,0.0,1.46
37,50,100,0.5,2,,0.6 ( 0.02 ),1 ( 0 ),6 ( 0 ),1.56 ( 0.01 ),0.0,,0.6,,0.0,1.56


In [10]:
# recover values
result.table_block$Stab[is.na(result.table_block$Stab)] = 0
result.table_block$FP_mean[is.na(result.table_block$FP_mean)] = 1
result.table_block$OOB_mean[is.na(result.table_block$OOB_mean)] = 2
result.table_block$FN_mean[result.table_block$num_select == 0] = 6

In [11]:
result.table_block[c(1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,65,69,73,77), ]

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
1,50,50,0.1,1,0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0,,0.08,1,6,0.59
5,50,100,0.1,2,0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.6 ( 0.01 ),0,,0.08,1,6,0.6
9,50,500,0.1,10,0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0,,0.08,1,6,0.59
13,50,1000,0.1,20,0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0,,0.08,1,6,0.59
17,50,50,0.3,1,0,0.31 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.14 ( 0.01 ),0,,0.31,1,6,1.14
21,50,100,0.3,2,0,0.32 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.2 ( 0.01 ),0,,0.32,1,6,1.2
25,50,500,0.3,10,0,0.36 ( 0.02 ),1 ( 0 ),6 ( 0 ),1.23 ( 0.01 ),0,,0.36,1,6,1.23
29,50,1000,0.3,20,0,0.34 ( 0.01 ),1 ( 0 ),6 ( 0 ),1.26 ( 0.01 ),0,,0.34,1,6,1.26
33,50,50,0.5,1,0,0.6 ( 0.03 ),1 ( 0 ),6 ( 0 ),1.46 ( 0.01 ),0,,0.6,1,6,1.46
37,50,100,0.5,2,0,0.6 ( 0.02 ),1 ( 0 ),6 ( 0 ),1.56 ( 0.01 ),0,,0.6,1,6,1.56


In [12]:
head(result.table_block)

N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
50,50,0.1,1.0,0.0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.59 ( 0.01 ),0.0,,0.08,1.0,6.0,0.59
100,50,0.1,0.5,0.03,0.07 ( 0 ),2.26 ( 0.17 ),5.06 ( 0.12 ),0.58 ( 0 ),3.04,0.7,0.07,2.26,5.06,0.58
500,50,0.1,0.1,0.23,0.05 ( 0 ),2.34 ( 0.16 ),3.77 ( 0.14 ),0.56 ( 0 ),4.56,0.49,0.05,2.34,3.77,0.56
1000,50,0.1,0.05,0.51,0.05 ( 0 ),2.37 ( 0.16 ),2.54 ( 0.11 ),0.55 ( 0 ),5.83,0.38,0.05,2.37,2.54,0.55
50,100,0.1,2.0,0.0,0.08 ( 0 ),1 ( 0 ),6 ( 0 ),0.6 ( 0.01 ),0.0,,0.08,1.0,6.0,0.6
100,100,0.1,1.0,0.01,0.07 ( 0 ),4.76 ( 0.27 ),5.51 ( 0.09 ),0.58 ( 0 ),5.24,0.91,0.07,4.76,5.51,0.58


In [13]:
tail(result.table_block)

Unnamed: 0,N,P,Corr,Ratio,Stab,MSE,FP,FN,OOB,num_select,FDR,MSE_mean,FP_mean,FN_mean,OOB_mean
75,500,500,0.9,1.0,0.12,0.73 ( 0.01 ),20.83 ( 0.45 ),2.04 ( 0.09 ),2 ( 0.01 ),24.79,0.84,0.73,20.8,2.04,2.0
76,1000,500,0.9,0.5,0.18,0.68 ( 0.01 ),18.47 ( 0.43 ),1 ( 0.07 ),1.92 ( 0 ),23.47,0.78,0.68,18.4,0.0,1.92
77,50,1000,0.9,20.0,0.0,1.22 ( 0.05 ),1 ( 0 ),6 ( 0 ),2.3 ( 0.03 ),0.0,,1.22,1.0,6.0,2.3
78,100,1000,0.9,10.0,0.01,0.93 ( 0.02 ),49.18 ( 0.69 ),4.67 ( 0.09 ),2.22 ( 0.02 ),50.51,0.97,0.93,49.1,4.67,2.22
79,500,1000,0.9,2.0,0.06,0.78 ( 0.01 ),46.01 ( 0.7 ),2.14 ( 0.09 ),2.12 ( 0.01 ),49.87,0.92,0.78,46.0,2.14,2.12
80,1000,1000,0.9,1.0,0.08,0.74 ( 0.01 ),43.05 ( 0.72 ),1.17 ( 0.08 ),2.07 ( 0 ),47.88,0.9,0.74,43.0,1.17,2.07


In [14]:
## export
write.table(result.table_block, '../results_summary_cts/sim_block_rf.txt', sep='\t', row.names=F)