## Introduction

In this notebook, we will use the 61 patients with fully complete data from all MRI scans and see what we can learn about whether the parameters associate with tumor score outcome. 

## Prepare the data

In [2]:
options(repr.matrix.max.rows=600, repr.matrix.max.cols=200)

In [59]:
library(randomForest)
library(randomForestSRC)
library(caret)

Loading required package: lattice
Loading required package: ggplot2

Attaching package: ‘ggplot2’

The following object is masked from ‘package:randomForest’:

    margin



In [4]:
## reading in the data: 
recgli = read.csv('/home/sf673542/DataWrangling/ParseData/7thParse_researchPath_withInVivo.csv')
dim(recgli)

In [5]:
## parse the data down to only those that have all advanced modalities: 
recgli = recgli[which(recgli$include_all_advanced_mods == 1),]
dim(recgli)

In [6]:
## inspect which columns still have NA by using previously defined criteria: 
colSums(is.na(recgli))

In [7]:
## this is fine; we weren't going to use diffusion b=2000 data anyway, and nt1d information needs to be recalculated. 
## we will exclude the nt1v situation: 
recgli = recgli[!is.na(recgli$nt1v),]
dim(recgli)

In [8]:
## use this data frame to index the columns: 
data.frame(index = 1:length(colnames(recgli)), colnames = colnames(recgli))

index,colnames
1,b_number
2,t_number
3,sf_number
4,current_scan_date
5,current_surgery_date
6,current_hist_grade
7,current_hist_type
8,roi.label
9,tumor_cell_evaluation
10,necrosis


In [102]:
## let's label the features by group: 
anat_fts = colnames(recgli)[16:19]
anat_d_fts = colnames(recgli)[16:20]
anat_roi_fts = colnames(recgli)[c(66:70)]
diffu1000_fts = colnames(recgli)[21:26]
perf_fts = colnames(recgli)[33:36]
spec_fts = colnames(recgli)[37:43]
study_fts = colnames(recgli)[46]

In [10]:
## look at the distribution of the outcome: 
table(recgli$tumor_cell_evaluation)


      0   1   2   3 Ind 
  0  21  17  29  31   6 

In [11]:
## remove those with "indeterminable" outcome: 
recgli = recgli[which(recgli$tumor_cell_evaluation != "Ind"),]
recgli = droplevels.data.frame(recgli)
dim(recgli)

In [12]:
## look at the distribution of the outcome again: 
table(recgli$tumor_cell_evaluation)


 0  1  2  3 
21 17 29 31 

In [13]:
## here we have a fairly even split among all of these; this might work out in a random split with no stratification 
## we still need to split by patient though. 
unique_tnums = unique(recgli$t_number)
length(unique_tnums)

## Split the data by patient while making sure there is an even distribution of tumor scores in both training and testing: 

In [14]:
## do an 75/25 patient split: 
set.seed(4)
train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
test_tnums = unique_tnums[! unique_tnums %in% train_tnums]

In [15]:
## split data by patient split above: 
recgli_train= recgli[recgli$t_number %in% train_tnums, ]
dim(recgli_train)
recgli_test= recgli[recgli$t_number %in% test_tnums, ]
dim(recgli_test)

In [16]:
## make sure that the balance of outcome is good: 
table(recgli_train$tumor_cell_evaluation)
table(recgli_test$tumor_cell_evaluation)
## looks alright.


 0  1  2  3 
16 12 20 24 


0 1 2 3 
5 5 9 7 

## Create template for outcome of experiments: 

In [78]:
## let's extract confusion matrix and put that in a data frame: 
outcome_experiment_template = data.frame(expnum = NA,
                                        node_size = NA,
                                        mtry = NA,
                                        tr_out0_pred0 = NA, 
                                        tr_out0_pred1 = NA, 
                                        tr_out0_pred2 = NA, 
                                        tr_out0_pred3 = NA, 
                                        tr_out1_pred0 = NA, 
                                        tr_out1_pred1 = NA, 
                                        tr_out1_pred2 = NA, 
                                        tr_out1_pred3 = NA, 
                                        tr_out2_pred0 = NA, 
                                        tr_out2_pred1 = NA, 
                                        tr_out2_pred2 = NA, 
                                        tr_out2_pred3 = NA, 
                                        tr_out3_pred0 = NA, 
                                        tr_out3_pred1 = NA, 
                                        tr_out3_pred2 = NA, 
                                        tr_out3_pred3 = NA, 
                                        tr_out0_classerror = NA, 
                                        tr_out1_classerror = NA, 
                                        tr_out2_classerror = NA, 
                                        tr_out3_classerror = NA, 
                                        cv_out0_pred0 = NA, 
                                        cv_out0_pred1 = NA, 
                                        cv_out0_pred2 = NA, 
                                        cv_out0_pred3 = NA, 
                                        cv_out1_pred0 = NA, 
                                        cv_out1_pred1 = NA, 
                                        cv_out1_pred2 = NA, 
                                        cv_out1_pred3 = NA, 
                                        cv_out2_pred0 = NA, 
                                        cv_out2_pred1 = NA, 
                                        cv_out2_pred2 = NA, 
                                        cv_out2_pred3 = NA, 
                                        cv_out3_pred0 = NA, 
                                        cv_out3_pred1 = NA, 
                                        cv_out3_pred2 = NA, 
                                        cv_out3_pred3 = NA)

## Experiment Set 1 - Only anatomical features: 

In [79]:
outcome_anat_experiment = outcome_experiment_template

In [73]:
rf = randomForest(tumor_cell_evaluation ~ nfse+nt1c+nt1v+nfl, data = recgli_train)
preds = predict(rf, recgli_test)

In [74]:
conf_mat = confusionMatrix(preds, recgli_test$tumor_cell_evaluation)
conf_mat$table
conf_mat$table["0", "2"]

          Reference
Prediction 0 1 2 3
         0 1 0 1 0
         1 2 2 1 3
         2 1 0 2 0
         3 1 3 5 4

### a. first experiment: use nfse, nt1c, nt1v, nfl

In [80]:
## let's begin by running experiments for anatomic features only: 
## for each experiment, we'll run the random forest with a variety of parameters 
expnum = 1
for (i in 1:100){
    set.seed(i)
    
    ## define features and outcome: 
    outcome = "tumor_cell_evaluation"
    features.addsign <- paste(anat_fts, collapse = "+")
    rf_formula = as.formula(paste(outcome, features.addsign, sep = "~"))
    
    ## define training and testing (from the training) basically for x-val: 
    uniqute_tnums = unique(recgli_train$t_number)
    train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
    test_tnums = unique_tnums[! unique_tnums %in% train_tnums]
    
    recgli_exp_train= recgli_train[recgli_train$t_number %in% train_tnums, ]
    recgli_exp_cv = recgli_train[! recgli_train$t_number %in% train_tnums, ]
    
    for (nodesize in c(1, 2, 3)){
        for (mtry in c(2, 3, 4)){
            rf = randomForest(formula = rf_formula, 
                 data = recgli_exp_train, nodesize = nodesize, mtry = mtry)
            
            cv_preds = predict(rf, recgli_exp_cv)
            
            cv_conf_mat = confusionMatrix(cv_preds, recgli_exp_cv$tumor_cell_evaluation)
            
            outcome_anat_experiment[expnum,'expnum'] = expnum
            outcome_anat_experiment[expnum,'node_size'] = nodesize
            outcome_anat_experiment[expnum,'mtry'] = mtry
            outcome_anat_experiment[expnum,'tr_out0_pred0'] = rf$confusion['0', '0']
            outcome_anat_experiment[expnum,'tr_out0_pred1'] = rf$confusion['0', '1']
            outcome_anat_experiment[expnum,'tr_out0_pred2'] = rf$confusion['0', '2']
            outcome_anat_experiment[expnum,'tr_out0_pred3'] = rf$confusion['0', '3']
            outcome_anat_experiment[expnum,'tr_out1_pred0'] = rf$confusion['1', '0']
            outcome_anat_experiment[expnum,'tr_out1_pred1'] = rf$confusion['1', '1']
            outcome_anat_experiment[expnum,'tr_out1_pred2'] = rf$confusion['1', '2']
            outcome_anat_experiment[expnum,'tr_out1_pred3'] = rf$confusion['1', '3']
            outcome_anat_experiment[expnum,'tr_out2_pred0'] = rf$confusion['2', '0']
            outcome_anat_experiment[expnum,'tr_out2_pred1'] = rf$confusion['2', '1']
            outcome_anat_experiment[expnum,'tr_out2_pred2'] = rf$confusion['2', '2']
            outcome_anat_experiment[expnum,'tr_out2_pred3'] = rf$confusion['2', '3']
            outcome_anat_experiment[expnum,'tr_out3_pred0'] = rf$confusion['3', '0']
            outcome_anat_experiment[expnum,'tr_out3_pred1'] = rf$confusion['3', '1']
            outcome_anat_experiment[expnum,'tr_out3_pred2'] = rf$confusion['3', '2']
            outcome_anat_experiment[expnum,'tr_out3_pred3'] = rf$confusion['3', '3']
            outcome_anat_experiment[expnum,'tr_out0_classerror'] = rf$confusion["0", 'class.error']
            outcome_anat_experiment[expnum,'tr_out1_classerror'] = rf$confusion["1", 'class.error']
            outcome_anat_experiment[expnum,'tr_out2_classerror'] = rf$confusion["2", 'class.error']
            outcome_anat_experiment[expnum,'tr_out3_classerror'] = rf$confusion["3", 'class.error']
            
            outcome_anat_experiment[expnum,'cv_out0_pred0'] = cv_conf_mat$table['0', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred1'] = cv_conf_mat$table['1', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred2'] = cv_conf_mat$table['2', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred3'] = cv_conf_mat$table['3', '0']
            outcome_anat_experiment[expnum,'cv_out1_pred0'] = cv_conf_mat$table['0', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred1'] = cv_conf_mat$table['1', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred2'] = cv_conf_mat$table['2', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred3'] = cv_conf_mat$table['3', '1']
            outcome_anat_experiment[expnum,'cv_out2_pred0'] = cv_conf_mat$table['0', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred1'] = cv_conf_mat$table['1', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred2'] = cv_conf_mat$table['2', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred3'] = cv_conf_mat$table['3', '2']
            outcome_anat_experiment[expnum,'cv_out3_pred0'] = cv_conf_mat$table['0', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred1'] = cv_conf_mat$table['1', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred2'] = cv_conf_mat$table['2', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred3'] = cv_conf_mat$table['3', '3']
            
            expnum = expnum + 1
        }
    }
}

In [81]:
outcome_anat_experiment1 = outcome_anat_experiment

In [82]:
head(outcome_anat_experiment1)

expnum,node_size,mtry,tr_out0_pred0,tr_out0_pred1,tr_out0_pred2,tr_out0_pred3,tr_out1_pred0,tr_out1_pred1,tr_out1_pred2,tr_out1_pred3,tr_out2_pred0,tr_out2_pred1,tr_out2_pred2,tr_out2_pred3,tr_out3_pred0,tr_out3_pred1,tr_out3_pred2,tr_out3_pred3,tr_out0_classerror,tr_out1_classerror,tr_out2_classerror,tr_out3_classerror,cv_out0_pred0,cv_out0_pred1,cv_out0_pred2,cv_out0_pred3,cv_out1_pred0,cv_out1_pred1,cv_out1_pred2,cv_out1_pred3,cv_out2_pred0,cv_out2_pred1,cv_out2_pred2,cv_out2_pred3,cv_out3_pred0,cv_out3_pred1,cv_out3_pred2,cv_out3_pred3
1,1,2,8,1,3,4,4,0,3,1,2,3,5,7,6,1,5,4,0.5,1,0.7058824,0.75,0,0,0,0,3,0,0,1,1,1,1,0,3,1,1,3
2,1,3,6,0,4,6,3,0,4,1,3,2,5,7,6,1,5,4,0.625,1,0.7058824,0.75,0,0,0,0,2,1,0,1,1,1,1,0,3,1,1,3
3,1,4,6,1,5,4,3,0,4,1,2,2,6,7,6,1,5,4,0.625,1,0.6470588,0.75,0,0,0,0,2,1,0,1,1,1,1,0,3,1,1,3
4,2,2,7,1,4,4,4,0,3,1,3,2,6,6,6,1,5,4,0.5625,1,0.6470588,0.75,0,0,0,0,2,1,0,1,1,1,1,0,3,1,1,3
5,2,3,7,1,3,5,3,0,4,1,2,2,7,6,5,1,6,4,0.5625,1,0.5882353,0.75,0,0,0,0,1,1,0,2,1,1,1,0,3,1,1,3
6,2,4,7,1,4,4,4,0,3,1,2,2,7,6,6,1,4,5,0.5625,1,0.5882353,0.6875,0,0,0,0,2,1,0,1,1,1,1,0,3,1,1,3


In [83]:
summary(outcome_anat_experiment1)

     expnum        node_size      mtry   tr_out0_pred0   tr_out0_pred1  
 Min.   :  1.0   Min.   :1   Min.   :2   Min.   :0.000   Min.   :0.000  
 1st Qu.:225.8   1st Qu.:1   1st Qu.:2   1st Qu.:2.000   1st Qu.:1.000  
 Median :450.5   Median :2   Median :3   Median :4.000   Median :1.000  
 Mean   :450.5   Mean   :2   Mean   :3   Mean   :3.641   Mean   :1.371  
 3rd Qu.:675.2   3rd Qu.:3   3rd Qu.:4   3rd Qu.:5.000   3rd Qu.:2.000  
 Max.   :900.0   Max.   :3   Max.   :4   Max.   :8.000   Max.   :4.000  
 tr_out0_pred2   tr_out0_pred3   tr_out1_pred0   tr_out1_pred1   
 Min.   :0.000   Min.   :1.000   Min.   :0.000   Min.   :0.0000  
 1st Qu.:2.000   1st Qu.:3.000   1st Qu.:1.000   1st Qu.:0.0000  
 Median :3.000   Median :4.000   Median :1.000   Median :0.0000  
 Mean   :2.984   Mean   :3.933   Mean   :1.323   Mean   :0.7244  
 3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:2.000   3rd Qu.:1.0000  
 Max.   :7.000   Max.   :8.000   Max.   :5.000   Max.   :5.0000  
 tr_out1_pred2   tr_out1_pr

### b. Use all anatomical features: 

In [88]:
anat_fts
anat_roi_fts

In [89]:
outcome_anat_experiment = outcome_experiment_template

In [90]:
## let's begin by running experiments for anatomic features only: 
## for each experiment, we'll run the random forest with a variety of parameters 
expnum = 1
for (i in 1:100){
    set.seed(i)
    
    ## define features and outcome: 
    outcome = "tumor_cell_evaluation"
    features.addsign <- paste(anat_fts, anat_roi_fts, sep = "+")
    features.addsign = paste(features.addsign, collapse = "+")
    rf_formula = as.formula(paste(outcome, features.addsign, sep = "~"))
    
    ## define training and testing (from the training) basically for x-val: 
    uniqute_tnums = unique(recgli_train$t_number)
    train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
    test_tnums = unique_tnums[! unique_tnums %in% train_tnums]
    
    recgli_exp_train= recgli_train[recgli_train$t_number %in% train_tnums, ]
    recgli_exp_cv = recgli_train[! recgli_train$t_number %in% train_tnums, ]
    
    for (nodesize in c(1, 2, 3)){
        for (mtry in c(2, 3, 4)){
            rf = randomForest(formula = rf_formula, 
                 data = recgli_exp_train, nodesize = nodesize, mtry = mtry)
            
            cv_preds = predict(rf, recgli_exp_cv)
            
            cv_conf_mat = confusionMatrix(cv_preds, recgli_exp_cv$tumor_cell_evaluation)
            
            outcome_anat_experiment[expnum,'expnum'] = expnum
            outcome_anat_experiment[expnum,'node_size'] = nodesize
            outcome_anat_experiment[expnum,'mtry'] = mtry
            outcome_anat_experiment[expnum,'tr_out0_pred0'] = rf$confusion['0', '0']
            outcome_anat_experiment[expnum,'tr_out0_pred1'] = rf$confusion['0', '1']
            outcome_anat_experiment[expnum,'tr_out0_pred2'] = rf$confusion['0', '2']
            outcome_anat_experiment[expnum,'tr_out0_pred3'] = rf$confusion['0', '3']
            outcome_anat_experiment[expnum,'tr_out1_pred0'] = rf$confusion['1', '0']
            outcome_anat_experiment[expnum,'tr_out1_pred1'] = rf$confusion['1', '1']
            outcome_anat_experiment[expnum,'tr_out1_pred2'] = rf$confusion['1', '2']
            outcome_anat_experiment[expnum,'tr_out1_pred3'] = rf$confusion['1', '3']
            outcome_anat_experiment[expnum,'tr_out2_pred0'] = rf$confusion['2', '0']
            outcome_anat_experiment[expnum,'tr_out2_pred1'] = rf$confusion['2', '1']
            outcome_anat_experiment[expnum,'tr_out2_pred2'] = rf$confusion['2', '2']
            outcome_anat_experiment[expnum,'tr_out2_pred3'] = rf$confusion['2', '3']
            outcome_anat_experiment[expnum,'tr_out3_pred0'] = rf$confusion['3', '0']
            outcome_anat_experiment[expnum,'tr_out3_pred1'] = rf$confusion['3', '1']
            outcome_anat_experiment[expnum,'tr_out3_pred2'] = rf$confusion['3', '2']
            outcome_anat_experiment[expnum,'tr_out3_pred3'] = rf$confusion['3', '3']
            outcome_anat_experiment[expnum,'tr_out0_classerror'] = rf$confusion["0", 'class.error']
            outcome_anat_experiment[expnum,'tr_out1_classerror'] = rf$confusion["1", 'class.error']
            outcome_anat_experiment[expnum,'tr_out2_classerror'] = rf$confusion["2", 'class.error']
            outcome_anat_experiment[expnum,'tr_out3_classerror'] = rf$confusion["3", 'class.error']
            
            outcome_anat_experiment[expnum,'cv_out0_pred0'] = cv_conf_mat$table['0', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred1'] = cv_conf_mat$table['1', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred2'] = cv_conf_mat$table['2', '0']
            outcome_anat_experiment[expnum,'cv_out0_pred3'] = cv_conf_mat$table['3', '0']
            outcome_anat_experiment[expnum,'cv_out1_pred0'] = cv_conf_mat$table['0', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred1'] = cv_conf_mat$table['1', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred2'] = cv_conf_mat$table['2', '1']
            outcome_anat_experiment[expnum,'cv_out1_pred3'] = cv_conf_mat$table['3', '1']
            outcome_anat_experiment[expnum,'cv_out2_pred0'] = cv_conf_mat$table['0', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred1'] = cv_conf_mat$table['1', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred2'] = cv_conf_mat$table['2', '2']
            outcome_anat_experiment[expnum,'cv_out2_pred3'] = cv_conf_mat$table['3', '2']
            outcome_anat_experiment[expnum,'cv_out3_pred0'] = cv_conf_mat$table['0', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred1'] = cv_conf_mat$table['1', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred2'] = cv_conf_mat$table['2', '3']
            outcome_anat_experiment[expnum,'cv_out3_pred3'] = cv_conf_mat$table['3', '3']
            
            expnum = expnum + 1
        }
    }
}

In [91]:
outcome_anat_experiment2 = outcome_anat_experiment

In [117]:
outcome_anat_experiment2['cv_out0_classerror'] = 1-(outcome_anat_experiment2$cv_out0_pred0)/rowSums(outcome_anat_experiment2[,grep('cv_out0', colnames(outcome_anat_experiment2))])
outcome_anat_experiment2['cv_out1_classerror'] = 1-(outcome_anat_experiment2$cv_out1_pred1)/rowSums(outcome_anat_experiment2[,grep('cv_out1', colnames(outcome_anat_experiment2))])
outcome_anat_experiment2['cv_out2_classerror'] = 1-(outcome_anat_experiment2$cv_out2_pred2)/rowSums(outcome_anat_experiment2[,grep('cv_out2', colnames(outcome_anat_experiment2))])
outcome_anat_experiment2['cv_out3_classerror'] = 1-(outcome_anat_experiment2$cv_out3_pred3)/rowSums(outcome_anat_experiment2[,grep('cv_out3', colnames(outcome_anat_experiment2))])

In [97]:
## let's see what nodesize looks the best for exp2: 
for (nodesize in c(1:3)){
    nodesize_outcome = outcome_anat_experiment2[which(outcome_anat_experiment2$node_size==nodesize),]
    print(paste('summary for nodesize: ', as.character(nodesize)))
    print(summary(nodesize_outcome[,grep('classerror', colnames(nodesize_outcome))]))
}

[1] "summary for nodesize:  1"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.3333     Min.   :0.6000     Min.   :0.4000     Min.   :0.1667    
 1st Qu.:0.6667     1st Qu.:0.8889     1st Qu.:0.5882     1st Qu.:0.4405    
 Median :0.7500     Median :1.0000     Median :0.6667     Median :0.5263    
 Mean   :0.7558     Mean   :0.9368     Mean   :0.6850     Mean   :0.5445    
 3rd Qu.:0.8462     3rd Qu.:1.0000     3rd Qu.:0.7714     3rd Qu.:0.6667    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :1.0000    
[1] "summary for nodesize:  2"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.3636     Min.   :0.6000     Min.   :0.3846     Min.   :0.1304    
 1st Qu.:0.6667     1st Qu.:0.8889     1st Qu.:0.5714     1st Qu.:0.4545    
 Median :0.7500     Median :1.0000     Median :0.6667     Median :0.5294    
 Mean   :0.7602     Mean   :0.9358     Mean   :0.6794     Mean   :0.5531    
 3rd Qu.:0.866

In [98]:
## let's see what mtry looks the best for exp2: 
for (mtry in c(2:4)){
    mtry_outcome = outcome_anat_experiment2[which(outcome_anat_experiment2$mtry==mtry),]
    print(paste('summary for mtry: ', as.character(mtry)))
    print(summary(mtry_outcome[,grep('classerror', colnames(mtry_outcome))]))
}

## looks like mtry=4 is significantly better

[1] "summary for mtry:  2"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.3333     Min.   :0.6250     Min.   :0.4000     Min.   :0.1304    
 1st Qu.:0.7273     1st Qu.:0.9091     1st Qu.:0.6000     1st Qu.:0.3869    
 Median :0.8182     Median :1.0000     Median :0.6667     Median :0.4762    
 Mean   :0.8153     Mean   :0.9530     Mean   :0.6956     Mean   :0.5015    
 3rd Qu.:0.9167     3rd Qu.:1.0000     3rd Qu.:0.7857     3rd Qu.:0.6111    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :0.9333    
[1] "summary for mtry:  3"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.4545     Min.   :0.600      Min.   :0.3846     Min.   :0.2174    
 1st Qu.:0.6667     1st Qu.:0.875      1st Qu.:0.5714     1st Qu.:0.4675    
 Median :0.7500     Median :1.000      Median :0.6667     Median :0.5500    
 Mean   :0.7501     Mean   :0.930      Mean   :0.6748     Mean   :0.5652    
 3rd Qu.:0.8333     3r

### b. Use all diffusion features: 

In [100]:
outcome_perf_exp = outcome_experiment_template

In [104]:
paste(diffu1000_fts, collapse = "+")

In [105]:
## let's begin by running experiments for anatomic features only: 
## for each experiment, we'll run the random forest with a variety of parameters 
expnum = 1
for (i in 1:100){
    set.seed(i)
    
    ## define features and outcome: 
    outcome = "tumor_cell_evaluation"
    features.addsign <- paste(diffu1000_fts, collapse = "+")
    rf_formula = as.formula(paste(outcome, features.addsign, sep = "~"))
    
    ## define training and testing (from the training) basically for x-val: 
    uniqute_tnums = unique(recgli_train$t_number)
    train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
    test_tnums = unique_tnums[! unique_tnums %in% train_tnums]
    
    recgli_exp_train= recgli_train[recgli_train$t_number %in% train_tnums, ]
    recgli_exp_cv = recgli_train[! recgli_train$t_number %in% train_tnums, ]
    
    for (nodesize in c(1, 2, 3)){
        for (mtry in c(2, 3, 4)){
            rf = randomForest(formula = rf_formula, 
                 data = recgli_exp_train, nodesize = nodesize, mtry = mtry)
            
            cv_preds = predict(rf, recgli_exp_cv)
            
            cv_conf_mat = confusionMatrix(cv_preds, recgli_exp_cv$tumor_cell_evaluation)
            
            outcome_perf_exp[expnum,'expnum'] = expnum
            outcome_perf_exp[expnum,'node_size'] = nodesize
            outcome_perf_exp[expnum,'mtry'] = mtry
            outcome_perf_exp[expnum,'tr_out0_pred0'] = rf$confusion['0', '0']
            outcome_perf_exp[expnum,'tr_out0_pred1'] = rf$confusion['0', '1']
            outcome_perf_exp[expnum,'tr_out0_pred2'] = rf$confusion['0', '2']
            outcome_perf_exp[expnum,'tr_out0_pred3'] = rf$confusion['0', '3']
            outcome_perf_exp[expnum,'tr_out1_pred0'] = rf$confusion['1', '0']
            outcome_perf_exp[expnum,'tr_out1_pred1'] = rf$confusion['1', '1']
            outcome_perf_exp[expnum,'tr_out1_pred2'] = rf$confusion['1', '2']
            outcome_perf_exp[expnum,'tr_out1_pred3'] = rf$confusion['1', '3']
            outcome_perf_exp[expnum,'tr_out2_pred0'] = rf$confusion['2', '0']
            outcome_perf_exp[expnum,'tr_out2_pred1'] = rf$confusion['2', '1']
            outcome_perf_exp[expnum,'tr_out2_pred2'] = rf$confusion['2', '2']
            outcome_perf_exp[expnum,'tr_out2_pred3'] = rf$confusion['2', '3']
            outcome_perf_exp[expnum,'tr_out3_pred0'] = rf$confusion['3', '0']
            outcome_perf_exp[expnum,'tr_out3_pred1'] = rf$confusion['3', '1']
            outcome_perf_exp[expnum,'tr_out3_pred2'] = rf$confusion['3', '2']
            outcome_perf_exp[expnum,'tr_out3_pred3'] = rf$confusion['3', '3']
            outcome_perf_exp[expnum,'tr_out0_classerror'] = rf$confusion["0", 'class.error']
            outcome_perf_exp[expnum,'tr_out1_classerror'] = rf$confusion["1", 'class.error']
            outcome_perf_exp[expnum,'tr_out2_classerror'] = rf$confusion["2", 'class.error']
            outcome_perf_exp[expnum,'tr_out3_classerror'] = rf$confusion["3", 'class.error']
            
            outcome_perf_exp[expnum,'cv_out0_pred0'] = cv_conf_mat$table['0', '0']
            outcome_perf_exp[expnum,'cv_out0_pred1'] = cv_conf_mat$table['1', '0']
            outcome_perf_exp[expnum,'cv_out0_pred2'] = cv_conf_mat$table['2', '0']
            outcome_perf_exp[expnum,'cv_out0_pred3'] = cv_conf_mat$table['3', '0']
            outcome_perf_exp[expnum,'cv_out1_pred0'] = cv_conf_mat$table['0', '1']
            outcome_perf_exp[expnum,'cv_out1_pred1'] = cv_conf_mat$table['1', '1']
            outcome_perf_exp[expnum,'cv_out1_pred2'] = cv_conf_mat$table['2', '1']
            outcome_perf_exp[expnum,'cv_out1_pred3'] = cv_conf_mat$table['3', '1']
            outcome_perf_exp[expnum,'cv_out2_pred0'] = cv_conf_mat$table['0', '2']
            outcome_perf_exp[expnum,'cv_out2_pred1'] = cv_conf_mat$table['1', '2']
            outcome_perf_exp[expnum,'cv_out2_pred2'] = cv_conf_mat$table['2', '2']
            outcome_perf_exp[expnum,'cv_out2_pred3'] = cv_conf_mat$table['3', '2']
            outcome_perf_exp[expnum,'cv_out3_pred0'] = cv_conf_mat$table['0', '3']
            outcome_perf_exp[expnum,'cv_out3_pred1'] = cv_conf_mat$table['1', '3']
            outcome_perf_exp[expnum,'cv_out3_pred2'] = cv_conf_mat$table['2', '3']
            outcome_perf_exp[expnum,'cv_out3_pred3'] = cv_conf_mat$table['3', '3']
            
            expnum = expnum + 1
        }
    }
}

In [121]:
head(outcome_perf_exp)

expnum,node_size,mtry,tr_out0_pred0,tr_out0_pred1,tr_out0_pred2,tr_out0_pred3,tr_out1_pred0,tr_out1_pred1,tr_out1_pred2,tr_out1_pred3,tr_out2_pred0,tr_out2_pred1,tr_out2_pred2,tr_out2_pred3,tr_out3_pred0,tr_out3_pred1,tr_out3_pred2,tr_out3_pred3,tr_out0_classerror,tr_out1_classerror,tr_out2_classerror,tr_out3_classerror,cv_out0_pred0,cv_out0_pred1,cv_out0_pred2,cv_out0_pred3,cv_out1_pred0,cv_out1_pred1,cv_out1_pred2,cv_out1_pred3,cv_out2_pred0,cv_out2_pred1,cv_out2_pred2,cv_out2_pred3,cv_out3_pred0,cv_out3_pred1,cv_out3_pred2,cv_out3_pred3
1,1,2,6,1,3,6,2,2,2,2,2,1,4,10,6,1,9,0,0.625,0.75,0.7647059,1.0,0,0,0,0,1,2,0,1,0,0,2,1,2,3,0,3
2,1,3,6,1,2,7,2,2,2,2,2,0,6,9,6,1,9,0,0.625,0.75,0.6470588,1.0,0,0,0,0,1,2,0,1,0,0,2,1,1,3,0,4
3,1,4,6,1,2,7,1,2,2,3,2,1,5,9,4,1,11,0,0.625,0.75,0.7058824,1.0,0,0,0,0,1,2,0,1,0,0,1,2,1,3,0,4
4,2,2,6,1,2,7,2,2,2,2,2,2,5,8,6,1,9,0,0.625,0.75,0.7058824,1.0,0,0,0,0,1,2,1,0,0,0,2,1,2,3,0,3
5,2,3,6,1,3,6,2,2,2,2,1,1,6,9,4,1,10,1,0.625,0.75,0.6470588,0.9375,0,0,0,0,1,2,0,1,0,0,1,2,2,3,0,3
6,2,4,6,1,3,6,2,2,2,2,2,1,6,8,5,1,10,0,0.625,0.75,0.6470588,1.0,0,0,0,0,1,2,0,1,0,0,2,1,2,3,0,3


In [122]:
outcome_perf_exp['cv_out0_classerror'] = 1-(outcome_perf_exp$cv_out0_pred0)/rowSums(outcome_perf_exp[,grep('cv_out0', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out1_classerror'] = 1-(outcome_perf_exp$cv_out1_pred1)/rowSums(outcome_perf_exp[,grep('cv_out1', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out2_classerror'] = 1-(outcome_perf_exp$cv_out2_pred2)/rowSums(outcome_perf_exp[,grep('cv_out2', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out3_classerror'] = 1-(outcome_perf_exp$cv_out3_pred3)/rowSums(outcome_perf_exp[,grep('cv_out3', colnames(outcome_perf_exp))])

In [123]:
for (nodesize in c(1:3)){
    nodesize_outcome = outcome_perf_exp[which(outcome_perf_exp$node_size==nodesize),]
    print(paste('summary for nodesize: ', as.character(nodesize)))
    print(summary(nodesize_outcome[,grep('classerror', colnames(nodesize_outcome))]))
}

[1] "summary for nodesize:  1"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.2857     Min.   :0.4286     Min.   :0.3529     Min.   :0.3500    
 1st Qu.:0.6154     1st Qu.:0.7000     1st Qu.:0.5833     1st Qu.:0.5714    
 Median :0.7000     Median :0.8000     Median :0.6667     Median :0.7000    
 Mean   :0.7132     Mean   :0.8027     Mean   :0.6710     Mean   :0.6907    
 3rd Qu.:0.8462     3rd Qu.:0.9000     3rd Qu.:0.7500     3rd Qu.:0.7895    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :1.0000    
                                                                            
 cv_out0_classerror cv_out1_classerror cv_out2_classerror cv_out3_classerror
 Min.   :0.0000     Min.   :0.0000     Min.   :0.0000     Min.   :0.0000    
 1st Qu.:0.6786     1st Qu.:0.6667     1st Qu.:0.5556     1st Qu.:0.6000    
 Median :0.8333     Median :1.0000     Median :0.6667     Median :0.7500    
 Mean   :0.7770     Mean   :0.7813     Mean  

In [125]:
## let's see what mtry looks the best for exp2: 
for (mtry in c(2:4)){
    mtry_outcome = outcome_perf_exp[which(outcome_perf_exp$mtry==mtry),]
    print(paste('summary for mtry: ', as.character(mtry)))
    print(summary(mtry_outcome[,grep('classerror', colnames(mtry_outcome))]))
}

## looks like mtry=4 is significantly better

[1] "summary for mtry:  2"
 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.2857     Min.   :0.4286     Min.   :0.3529     Min.   :0.3500    
 1st Qu.:0.6000     1st Qu.:0.7000     1st Qu.:0.5882     1st Qu.:0.5909    
 Median :0.7000     Median :0.8000     Median :0.6667     Median :0.7059    
 Mean   :0.7065     Mean   :0.8015     Mean   :0.6815     Mean   :0.6988    
 3rd Qu.:0.8333     3rd Qu.:0.9000     3rd Qu.:0.7857     3rd Qu.:0.7917    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :1.0000    
                                                                            
 cv_out0_classerror cv_out1_classerror cv_out2_classerror cv_out3_classerror
 Min.   :0.0000     Min.   :0.0000     Min.   :0.0000     Min.   :0.0000    
 1st Qu.:0.6667     1st Qu.:0.6667     1st Qu.:0.5556     1st Qu.:0.6000    
 Median :0.8333     Median :1.0000     Median :0.7143     Median :0.7500    
 Mean   :0.7672     Mean   :0.7793     Mean   :0.

### d. Use all the perfusion: 

In [129]:
outcome_perf_exp = outcome_experiment_template

In [130]:
## let's begin by running experiments for anatomic features only: 
## for each experiment, we'll run the random forest with a variety of parameters 
expnum = 1
for (i in 1:100){
    set.seed(i)
    
    ## define features and outcome: 
    outcome = "tumor_cell_evaluation"
    features.addsign <- paste(perf_fts, collapse = "+")
    rf_formula = as.formula(paste(outcome, features.addsign, sep = "~"))
    
    ## define training and testing (from the training) basically for x-val: 
    uniqute_tnums = unique(recgli_train$t_number)
    train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
    test_tnums = unique_tnums[! unique_tnums %in% train_tnums]
    
    recgli_exp_train= recgli_train[recgli_train$t_number %in% train_tnums, ]
    recgli_exp_cv = recgli_train[! recgli_train$t_number %in% train_tnums, ]
    
    for (nodesize in c(1, 2, 3)){
        for (mtry in c(2, 3, 4)){
            rf = randomForest(formula = rf_formula, 
                 data = recgli_exp_train, nodesize = nodesize, mtry = mtry)
            
            cv_preds = predict(rf, recgli_exp_cv)
            
            cv_conf_mat = confusionMatrix(cv_preds, recgli_exp_cv$tumor_cell_evaluation)
            
            outcome_perf_exp[expnum,'expnum'] = expnum
            outcome_perf_exp[expnum,'node_size'] = nodesize
            outcome_perf_exp[expnum,'mtry'] = mtry
            outcome_perf_exp[expnum,'tr_out0_pred0'] = rf$confusion['0', '0']
            outcome_perf_exp[expnum,'tr_out0_pred1'] = rf$confusion['0', '1']
            outcome_perf_exp[expnum,'tr_out0_pred2'] = rf$confusion['0', '2']
            outcome_perf_exp[expnum,'tr_out0_pred3'] = rf$confusion['0', '3']
            outcome_perf_exp[expnum,'tr_out1_pred0'] = rf$confusion['1', '0']
            outcome_perf_exp[expnum,'tr_out1_pred1'] = rf$confusion['1', '1']
            outcome_perf_exp[expnum,'tr_out1_pred2'] = rf$confusion['1', '2']
            outcome_perf_exp[expnum,'tr_out1_pred3'] = rf$confusion['1', '3']
            outcome_perf_exp[expnum,'tr_out2_pred0'] = rf$confusion['2', '0']
            outcome_perf_exp[expnum,'tr_out2_pred1'] = rf$confusion['2', '1']
            outcome_perf_exp[expnum,'tr_out2_pred2'] = rf$confusion['2', '2']
            outcome_perf_exp[expnum,'tr_out2_pred3'] = rf$confusion['2', '3']
            outcome_perf_exp[expnum,'tr_out3_pred0'] = rf$confusion['3', '0']
            outcome_perf_exp[expnum,'tr_out3_pred1'] = rf$confusion['3', '1']
            outcome_perf_exp[expnum,'tr_out3_pred2'] = rf$confusion['3', '2']
            outcome_perf_exp[expnum,'tr_out3_pred3'] = rf$confusion['3', '3']
            outcome_perf_exp[expnum,'tr_out0_classerror'] = rf$confusion["0", 'class.error']
            outcome_perf_exp[expnum,'tr_out1_classerror'] = rf$confusion["1", 'class.error']
            outcome_perf_exp[expnum,'tr_out2_classerror'] = rf$confusion["2", 'class.error']
            outcome_perf_exp[expnum,'tr_out3_classerror'] = rf$confusion["3", 'class.error']
            
            outcome_perf_exp[expnum,'cv_out0_pred0'] = cv_conf_mat$table['0', '0']
            outcome_perf_exp[expnum,'cv_out0_pred1'] = cv_conf_mat$table['1', '0']
            outcome_perf_exp[expnum,'cv_out0_pred2'] = cv_conf_mat$table['2', '0']
            outcome_perf_exp[expnum,'cv_out0_pred3'] = cv_conf_mat$table['3', '0']
            outcome_perf_exp[expnum,'cv_out1_pred0'] = cv_conf_mat$table['0', '1']
            outcome_perf_exp[expnum,'cv_out1_pred1'] = cv_conf_mat$table['1', '1']
            outcome_perf_exp[expnum,'cv_out1_pred2'] = cv_conf_mat$table['2', '1']
            outcome_perf_exp[expnum,'cv_out1_pred3'] = cv_conf_mat$table['3', '1']
            outcome_perf_exp[expnum,'cv_out2_pred0'] = cv_conf_mat$table['0', '2']
            outcome_perf_exp[expnum,'cv_out2_pred1'] = cv_conf_mat$table['1', '2']
            outcome_perf_exp[expnum,'cv_out2_pred2'] = cv_conf_mat$table['2', '2']
            outcome_perf_exp[expnum,'cv_out2_pred3'] = cv_conf_mat$table['3', '2']
            outcome_perf_exp[expnum,'cv_out3_pred0'] = cv_conf_mat$table['0', '3']
            outcome_perf_exp[expnum,'cv_out3_pred1'] = cv_conf_mat$table['1', '3']
            outcome_perf_exp[expnum,'cv_out3_pred2'] = cv_conf_mat$table['2', '3']
            outcome_perf_exp[expnum,'cv_out3_pred3'] = cv_conf_mat$table['3', '3']
            
            expnum = expnum + 1
        }
    }
}

In [133]:
head(outcome_perf_exp)

expnum,node_size,mtry,tr_out0_pred0,tr_out0_pred1,tr_out0_pred2,tr_out0_pred3,tr_out1_pred0,tr_out1_pred1,tr_out1_pred2,tr_out1_pred3,tr_out2_pred0,tr_out2_pred1,tr_out2_pred2,tr_out2_pred3,tr_out3_pred0,tr_out3_pred1,tr_out3_pred2,tr_out3_pred3,tr_out0_classerror,tr_out1_classerror,tr_out2_classerror,tr_out3_classerror,cv_out0_pred0,cv_out0_pred1,cv_out0_pred2,cv_out0_pred3,cv_out1_pred0,cv_out1_pred1,cv_out1_pred2,cv_out1_pred3,cv_out2_pred0,cv_out2_pred1,cv_out2_pred2,cv_out2_pred3,cv_out3_pred0,cv_out3_pred1,cv_out3_pred2,cv_out3_pred3,cv_out0_classerror,cv_out1_classerror,cv_out2_classerror,cv_out3_classerror
1,1,2,5,1,7,3,1,0,4,3,6,2,5,4,2,1,7,6,0.6875,1,0.7058824,0.625,0,0,0,0,2,0,2,0,0,0,1,2,3,2,3,0,,1,0.6666667,1
2,1,3,5,1,8,2,1,0,4,3,5,3,5,4,3,1,6,6,0.6875,1,0.7058824,0.625,0,0,0,0,2,0,1,1,0,0,1,2,3,2,3,0,,1,0.6666667,1
3,1,4,5,2,7,2,1,0,4,3,6,3,4,4,3,1,7,5,0.6875,1,0.7647059,0.6875,0,0,0,0,1,0,3,0,0,0,1,2,3,2,3,0,,1,0.6666667,1
4,2,2,6,2,5,3,1,0,4,3,7,4,4,2,2,1,7,6,0.625,1,0.7647059,0.625,0,0,0,0,2,0,2,0,0,0,1,2,3,2,3,0,,1,0.6666667,1
5,2,3,5,2,7,2,1,0,4,3,6,3,3,5,4,2,6,4,0.6875,1,0.8235294,0.75,0,0,0,0,2,0,2,0,0,0,1,2,3,2,3,0,,1,0.6666667,1
6,2,4,5,2,7,2,1,0,5,2,6,4,3,4,3,1,7,5,0.6875,1,0.8235294,0.6875,0,0,0,0,2,0,1,1,0,0,1,2,3,2,3,0,,1,0.6666667,1


In [132]:
outcome_perf_exp['cv_out0_classerror'] = 1-(outcome_perf_exp$cv_out0_pred0)/rowSums(outcome_perf_exp[,grep('cv_out0', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out1_classerror'] = 1-(outcome_perf_exp$cv_out1_pred1)/rowSums(outcome_perf_exp[,grep('cv_out1', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out2_classerror'] = 1-(outcome_perf_exp$cv_out2_pred2)/rowSums(outcome_perf_exp[,grep('cv_out2', colnames(outcome_perf_exp))])
outcome_perf_exp['cv_out3_classerror'] = 1-(outcome_perf_exp$cv_out3_pred3)/rowSums(outcome_perf_exp[,grep('cv_out3', colnames(outcome_perf_exp))])

In [135]:
summary(outcome_perf_exp[,grep('classerror', colnames(outcome_perf_exp))])

 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.5000     Min.   :0.4000     Min.   :0.3846     Min.   :0.3182    
 1st Qu.:0.6875     1st Qu.:0.8182     1st Qu.:0.6667     1st Qu.:0.5263    
 Median :0.7778     Median :0.9000     Median :0.7500     Median :0.6111    
 Mean   :0.7726     Mean   :0.8927     Mean   :0.7454     Mean   :0.6069    
 3rd Qu.:0.8333     3rd Qu.:1.0000     3rd Qu.:0.8333     3rd Qu.:0.6875    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :0.9412    
                                                                            
 cv_out0_classerror cv_out1_classerror cv_out2_classerror cv_out3_classerror
 Min.   :0.0000     Min.   :0.0000     Min.   :0.2000     Min.   :0.0000    
 1st Qu.:0.6667     1st Qu.:0.8571     1st Qu.:0.6000     1st Qu.:0.4444    
 Median :0.8000     Median :1.0000     Median :0.7143     Median :0.6250    
 Mean   :0.7254     Mean   :0.8985     Mean   :0.7217     Mean   :0.5895    

### e. Use all the spectroscopy: 

In [141]:
outcome_spec_exp = outcome_experiment_template

In [142]:
## let's begin by running experiments for anatomic features only: 
## for each experiment, we'll run the random forest with a variety of parameters 
expnum = 1
for (i in 1:100){
    set.seed(i)
    
    ## define features and outcome: 
    outcome = "tumor_cell_evaluation"
    features.addsign <- paste(perf_fts, collapse = "+")
    rf_formula = as.formula(paste(outcome, features.addsign, sep = "~"))
    
    ## define training and testing (from the training) basically for x-val: 
    uniqute_tnums = unique(recgli_train$t_number)
    train_tnums = unique_tnums[sample(c(1:length(unique_tnums)), size = round(length(unique_tnums)*.75))]
    test_tnums = unique_tnums[! unique_tnums %in% train_tnums]
    
    recgli_exp_train= recgli_train[recgli_train$t_number %in% train_tnums, ]
    recgli_exp_cv = recgli_train[! recgli_train$t_number %in% train_tnums, ]
    
    for (nodesize in c(1, 2, 3)){
        for (mtry in c(2, 3, 4)){
            rf = randomForest(formula = rf_formula, 
                 data = recgli_exp_train, nodesize = nodesize, mtry = mtry)
            
            cv_preds = predict(rf, recgli_exp_cv)
            
            cv_conf_mat = confusionMatrix(cv_preds, recgli_exp_cv$tumor_cell_evaluation)
            
            outcome_spec_exp[expnum,'expnum'] = expnum
            outcome_spec_exp[expnum,'node_size'] = nodesize
            outcome_spec_exp[expnum,'mtry'] = mtry
            outcome_spec_exp[expnum,'tr_out0_pred0'] = rf$confusion['0', '0']
            outcome_spec_exp[expnum,'tr_out0_pred1'] = rf$confusion['0', '1']
            outcome_spec_exp[expnum,'tr_out0_pred2'] = rf$confusion['0', '2']
            outcome_spec_exp[expnum,'tr_out0_pred3'] = rf$confusion['0', '3']
            outcome_spec_exp[expnum,'tr_out1_pred0'] = rf$confusion['1', '0']
            outcome_spec_exp[expnum,'tr_out1_pred1'] = rf$confusion['1', '1']
            outcome_spec_exp[expnum,'tr_out1_pred2'] = rf$confusion['1', '2']
            outcome_spec_exp[expnum,'tr_out1_pred3'] = rf$confusion['1', '3']
            outcome_spec_exp[expnum,'tr_out2_pred0'] = rf$confusion['2', '0']
            outcome_spec_exp[expnum,'tr_out2_pred1'] = rf$confusion['2', '1']
            outcome_spec_exp[expnum,'tr_out2_pred2'] = rf$confusion['2', '2']
            outcome_spec_exp[expnum,'tr_out2_pred3'] = rf$confusion['2', '3']
            outcome_spec_exp[expnum,'tr_out3_pred0'] = rf$confusion['3', '0']
            outcome_spec_exp[expnum,'tr_out3_pred1'] = rf$confusion['3', '1']
            outcome_spec_exp[expnum,'tr_out3_pred2'] = rf$confusion['3', '2']
            outcome_spec_exp[expnum,'tr_out3_pred3'] = rf$confusion['3', '3']
            outcome_spec_exp[expnum,'tr_out0_classerror'] = rf$confusion["0", 'class.error']
            outcome_spec_exp[expnum,'tr_out1_classerror'] = rf$confusion["1", 'class.error']
            outcome_spec_exp[expnum,'tr_out2_classerror'] = rf$confusion["2", 'class.error']
            outcome_spec_exp[expnum,'tr_out3_classerror'] = rf$confusion["3", 'class.error']
            
            outcome_spec_exp[expnum,'cv_out0_pred0'] = cv_conf_mat$table['0', '0']
            outcome_spec_exp[expnum,'cv_out0_pred1'] = cv_conf_mat$table['1', '0']
            outcome_spec_exp[expnum,'cv_out0_pred2'] = cv_conf_mat$table['2', '0']
            outcome_spec_exp[expnum,'cv_out0_pred3'] = cv_conf_mat$table['3', '0']
            outcome_spec_exp[expnum,'cv_out1_pred0'] = cv_conf_mat$table['0', '1']
            outcome_spec_exp[expnum,'cv_out1_pred1'] = cv_conf_mat$table['1', '1']
            outcome_spec_exp[expnum,'cv_out1_pred2'] = cv_conf_mat$table['2', '1']
            outcome_spec_exp[expnum,'cv_out1_pred3'] = cv_conf_mat$table['3', '1']
            outcome_spec_exp[expnum,'cv_out2_pred0'] = cv_conf_mat$table['0', '2']
            outcome_spec_exp[expnum,'cv_out2_pred1'] = cv_conf_mat$table['1', '2']
            outcome_spec_exp[expnum,'cv_out2_pred2'] = cv_conf_mat$table['2', '2']
            outcome_spec_exp[expnum,'cv_out2_pred3'] = cv_conf_mat$table['3', '2']
            outcome_spec_exp[expnum,'cv_out3_pred0'] = cv_conf_mat$table['0', '3']
            outcome_spec_exp[expnum,'cv_out3_pred1'] = cv_conf_mat$table['1', '3']
            outcome_spec_exp[expnum,'cv_out3_pred2'] = cv_conf_mat$table['2', '3']
            outcome_spec_exp[expnum,'cv_out3_pred3'] = cv_conf_mat$table['3', '3']
            
            expnum = expnum + 1
        }
    }
}

In [143]:
outcome_spec_exp['cv_out0_classerror'] = 1-(outcome_spec_exp$cv_out0_pred0)/rowSums(outcome_spec_exp[,grep('cv_out0', colnames(outcome_spec_exp))])
outcome_spec_exp['cv_out1_classerror'] = 1-(outcome_spec_exp$cv_out1_pred1)/rowSums(outcome_spec_exp[,grep('cv_out1', colnames(outcome_spec_exp))])
outcome_spec_exp['cv_out2_classerror'] = 1-(outcome_spec_exp$cv_out2_pred2)/rowSums(outcome_spec_exp[,grep('cv_out2', colnames(outcome_spec_exp))])
outcome_spec_exp['cv_out3_classerror'] = 1-(outcome_spec_exp$cv_out3_pred3)/rowSums(outcome_spec_exp[,grep('cv_out3', colnames(outcome_spec_exp))])

In [144]:
summary(outcome_spec_exp[,grep('classerror', colnames(outcome_perf_exp))])

 tr_out0_classerror tr_out1_classerror tr_out2_classerror tr_out3_classerror
 Min.   :0.5000     Min.   :0.4000     Min.   :0.3846     Min.   :0.3182    
 1st Qu.:0.6875     1st Qu.:0.8182     1st Qu.:0.6667     1st Qu.:0.5263    
 Median :0.7778     Median :0.9000     Median :0.7500     Median :0.6111    
 Mean   :0.7726     Mean   :0.8927     Mean   :0.7454     Mean   :0.6069    
 3rd Qu.:0.8333     3rd Qu.:1.0000     3rd Qu.:0.8333     3rd Qu.:0.6875    
 Max.   :1.0000     Max.   :1.0000     Max.   :1.0000     Max.   :0.9412    
                                                                            
 cv_out0_classerror cv_out1_classerror cv_out2_classerror cv_out3_classerror
 Min.   :0.0000     Min.   :0.0000     Min.   :0.2000     Min.   :0.0000    
 1st Qu.:0.6667     1st Qu.:0.8571     1st Qu.:0.6000     1st Qu.:0.4444    
 Median :0.8000     Median :1.0000     Median :0.7143     Median :0.6250    
 Mean   :0.7254     Mean   :0.8985     Mean   :0.7217     Mean   :0.5895    