PR addressing Issue #43 (#191)

Homogenised way in which `tune.mint.splsda()` and `perf.mint.splsda()` calculate BER, such that now both use weighted average of BER across studies. Added unit tests to ensure this homogenity
mixOmicsTeam · Dec 8, 2022 · 5b1c57a · 5b1c57a
1 parent 8cf27e0
commit 5b1c57a
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 16 deletions.
diff --git a/R/LOGOCV.R b/R/LOGOCV.R
@@ -263,8 +263,12 @@ LOGOCV <- function(X,
                 })
             }
 
-            # average BER over the study
-            error.mean[[ijk]] = apply(error, 2, mean)
+            # weighted average BER over the studies
+            error.mean[[ijk]] = apply(error, 2, function(x) {
+                sum(x * table(study)/length(study))
+            })
+
+
             keepX.opt[[ijk]] =
                 which(error.mean[[ijk]] ==  min(error.mean[[ijk]]))[1]
 

diff --git a/R/perf.mint.splsda.R b/R/perf.mint.splsda.R
@@ -218,6 +218,10 @@ perf.mint.plsda <- function (object,
                 auc.mean.study[[comp]][[study_i]] = statauc(data)
             }
 
+            # average of ER and BER across studies, weighted by study sample size
+            global$BER[comp,] <- global$BER[comp,] + study.specific[[study_i]]$BER[comp, ] * table(study)[study_i]/length(study)
+            global$overall[comp,] <- global$overall[comp,] + study.specific[[study_i]]$overall[comp, ] * table(study)[study_i]/length(study)
+
         } # end study_i 1:M (M folds)
 
         for (ijk in dist)
@@ -227,20 +231,6 @@ perf.mint.plsda <- function (object,
         }
         prediction.all[[comp]] = prediction.comp
 
-
-        # global results
-        #BER
-        global$BER[comp,] = sapply(class.comp, function(x){
-            conf = get.confusion_matrix(truth = factor(Y), predicted = x)
-            get.BER(conf)
-        })
-
-        #overall
-        global$overall[comp,] = sapply(class.comp, function(x){
-            conf = get.confusion_matrix(truth = factor(Y), predicted = x)
-            out = sum(apply(conf, 1, sum) - diag(conf)) / length(Y)
-        })
-
         #classification for each level of Y
         temp = lapply(class.comp, function(x){
             conf = get.confusion_matrix(truth = factor(Y), predicted = x)

diff --git a/tests/testthat/test-perf.mint.splsda.R b/tests/testthat/test-perf.mint.splsda.R
@@ -32,3 +32,34 @@ test_that("perf.mint.splsda works with custom alpha", code = {
     expect_true(all(out$choice.ncomp == 1))
 
 })
+
+test_that("perf.mint.splsda and tune.mint.splsda yield the same error rates for all measures", code = {
+    data(stemcells)
+    X = stemcells$gene
+    Y = stemcells$celltype
+    study <- stemcells$study
+
+    metricsC1 <- matrix(0, nrow = 2, ncol = 3)
+    colnames(metricsC1) <- c('max.dist', 'centroids.dist', 'mahalanobis.dist')
+    rownames(metricsC1) <- c("overall", "BER")
+
+    metricsC2 <- metricsC1
+
+    for (dist in c('max.dist', 'centroids.dist', 'mahalanobis.dist')) {
+        for (measure in c("overall", "BER")) {
+            tune.mint = tune.mint.splsda(X = X, Y = Y, study = study, ncomp = 2, test.keepX = seq(1, 51, 5),
+                                         dist = dist, progressBar = FALSE, measure = measure)
+
+            mint.splsda.res = mint.splsda(X = X, Y = Y, study = study, ncomp = 2,
+                                          keepX = tune.mint$choice.keepX)
+
+            perf.mint = perf(mint.splsda.res, progressBar = FALSE, dist = dist)
+
+            metricsC1[measure, dist] <- tune.mint$error.rate[which(rownames(tune.mint$error.rate) == tune.mint$choice.keepX[1]), "comp1"]
+            metricsC2[measure, dist] <- tune.mint$error.rate[which(rownames(tune.mint$error.rate) == tune.mint$choice.keepX[2]), "comp2"]
+
+            expect_equal(round(perf.mint$global.error[[measure]]["comp1",], 4), round(metricsC1[[measure, dist]], 4))
+            expect_equal(round(perf.mint$global.error[[measure]]["comp2",], 4), round(metricsC2[[measure, dist]], 4))
+        }
+    }
+})