Skip to content

Commit

Permalink
PR addressing Issue #43 (#191)
Browse files Browse the repository at this point in the history
Homogenised way in which `tune.mint.splsda()` and `perf.mint.splsda()` calculate BER, such that now both use weighted average of BER across studies. Added unit tests to ensure this homogenity
  • Loading branch information
Max-Bladen committed Dec 8, 2022
1 parent 8cf27e0 commit 5b1c57a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 16 deletions.
8 changes: 6 additions & 2 deletions R/LOGOCV.R
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,12 @@ LOGOCV <- function(X,
})
}

# average BER over the study
error.mean[[ijk]] = apply(error, 2, mean)
# weighted average BER over the studies
error.mean[[ijk]] = apply(error, 2, function(x) {
sum(x * table(study)/length(study))
})


keepX.opt[[ijk]] =
which(error.mean[[ijk]] == min(error.mean[[ijk]]))[1]

Expand Down
18 changes: 4 additions & 14 deletions R/perf.mint.splsda.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ perf.mint.plsda <- function (object,
auc.mean.study[[comp]][[study_i]] = statauc(data)
}

# average of ER and BER across studies, weighted by study sample size
global$BER[comp,] <- global$BER[comp,] + study.specific[[study_i]]$BER[comp, ] * table(study)[study_i]/length(study)
global$overall[comp,] <- global$overall[comp,] + study.specific[[study_i]]$overall[comp, ] * table(study)[study_i]/length(study)

} # end study_i 1:M (M folds)

for (ijk in dist)
Expand All @@ -227,20 +231,6 @@ perf.mint.plsda <- function (object,
}
prediction.all[[comp]] = prediction.comp


# global results
#BER
global$BER[comp,] = sapply(class.comp, function(x){
conf = get.confusion_matrix(truth = factor(Y), predicted = x)
get.BER(conf)
})

#overall
global$overall[comp,] = sapply(class.comp, function(x){
conf = get.confusion_matrix(truth = factor(Y), predicted = x)
out = sum(apply(conf, 1, sum) - diag(conf)) / length(Y)
})

#classification for each level of Y
temp = lapply(class.comp, function(x){
conf = get.confusion_matrix(truth = factor(Y), predicted = x)
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-perf.mint.splsda.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,34 @@ test_that("perf.mint.splsda works with custom alpha", code = {
expect_true(all(out$choice.ncomp == 1))

})

test_that("perf.mint.splsda and tune.mint.splsda yield the same error rates for all measures", code = {
data(stemcells)
X = stemcells$gene
Y = stemcells$celltype
study <- stemcells$study

metricsC1 <- matrix(0, nrow = 2, ncol = 3)
colnames(metricsC1) <- c('max.dist', 'centroids.dist', 'mahalanobis.dist')
rownames(metricsC1) <- c("overall", "BER")

metricsC2 <- metricsC1

for (dist in c('max.dist', 'centroids.dist', 'mahalanobis.dist')) {
for (measure in c("overall", "BER")) {
tune.mint = tune.mint.splsda(X = X, Y = Y, study = study, ncomp = 2, test.keepX = seq(1, 51, 5),
dist = dist, progressBar = FALSE, measure = measure)

mint.splsda.res = mint.splsda(X = X, Y = Y, study = study, ncomp = 2,
keepX = tune.mint$choice.keepX)

perf.mint = perf(mint.splsda.res, progressBar = FALSE, dist = dist)

metricsC1[measure, dist] <- tune.mint$error.rate[which(rownames(tune.mint$error.rate) == tune.mint$choice.keepX[1]), "comp1"]
metricsC2[measure, dist] <- tune.mint$error.rate[which(rownames(tune.mint$error.rate) == tune.mint$choice.keepX[2]), "comp2"]

expect_equal(round(perf.mint$global.error[[measure]]["comp1",], 4), round(metricsC1[[measure, dist]], 4))
expect_equal(round(perf.mint$global.error[[measure]]["comp2",], 4), round(metricsC2[[measure, dist]], 4))
}
}
})

0 comments on commit 5b1c57a

Please sign in to comment.