Skip to content

Commit

Permalink
Merge ceeb839 into c3b2aa6
Browse files Browse the repository at this point in the history
  • Loading branch information
asardaes committed Jul 5, 2017
2 parents c3b2aa6 + ceeb839 commit 11f7fac
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 118 deletions.
79 changes: 79 additions & 0 deletions pkg/caret/R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,82 @@ fill_failed_prob <- function(index, lev, submod) {
probValues
}

optimism_boot <- function(ctrl, dat, iter, lev, method, mod_rec, predicted, submod, loop) {
indexExtra <- ctrl$indexExtra[[iter]]

if(is.null(indexExtra) || model_failed(mod_rec) || inherits(predicted, "try-error"))
return (NULL)

predictedExtra <- lapply(indexExtra, function(index) {
pred <- rec_pred(method = method,
object = mod_rec,
newdata = subset_x(dat, index),
param = submod)
trim_values(pred, ctrl, is.null(lev))
})

if(ctrl$classProbs)
probValuesExtra <- lapply(indexExtra, function(index) {
rec_prob(method = method,
object = mod_rec,
newdata = subset_x(dat, index),
param = submod)
})
else
probValuesExtra <- lapply(indexExtra, function(index) {
fill_failed_prob(index, lev, submod)
})

if(!is.null(submod)) {
allParam <- expandParameters(loop, submod)
allParam <- allParam[complete.cases(allParam),, drop = FALSE]

predictedExtra <- Map(predictedExtra, indexExtra, f = function(predicted, holdoutIndex) {
lapply(predicted, function(x) {
x <- outcome_conversion(x, lv = lev)
dat <- holdout_rec(mod_rec, dat, holdoutIndex)
dat$pred <- x
dat
})
})

if(ctrl$classProbs)
predictedExtra <- Map(predictedExtra, probValuesExtra, f = function(predicted, probValues) {
Map(cbind, predicted, probValues)
})

thisResampleExtra <- lapply(predictedExtra, function(predicted) {
lapply(predicted,
ctrl$summaryFunction,
lev = lev,
model = method)
})
thisResampleExtra[[1L]] <- lapply(thisResampleExtra[[1L]], function(res) {
names(res) <- paste0(names(res), "Orig")
res
})
thisResampleExtra[[2L]] <- lapply(thisResampleExtra[[2L]], function(res) {
names(res) <- paste0(names(res), "Boot")
res
})
thisResampleExtra <- do.call(cbind, lapply(thisResampleExtra, function(x) do.call(rbind, x)))
thisResampleExtra <- cbind(allParam, thisResampleExtra)

} else {
thisResampleExtra <- Map(predictedExtra, indexExtra, probValuesExtra,
f = function(predicted, holdoutIndex, probValues) {
tmp <- holdout_rec(mod_rec, dat, holdoutIndex)
tmp$pred <- outcome_conversion(predicted, lv = lev)
if(ctrl$classProbs) tmp <- cbind(tmp, probValues)
tmp <- merge(tmp, loop, all = TRUE)
ctrl$summaryFunction(tmp, lev = lev, model = method)
})
names(thisResampleExtra[[1L]]) <- paste0(names(thisResampleExtra[[1L]]), "Orig")
names(thisResampleExtra[[2L]]) <- paste0(names(thisResampleExtra[[2L]]), "Boot")
thisResampleExtra <- unlist(unname(thisResampleExtra), recursive = FALSE)
thisResampleExtra <- cbind(as.data.frame(t(thisResampleExtra)), loop)
}

# return
thisResampleExtra
}
133 changes: 16 additions & 117 deletions pkg/caret/R/recipes.R
Original file line number Diff line number Diff line change
Expand Up @@ -950,13 +950,15 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
if(!is.null(method$library)) pkgs <- c(pkgs, method$library)

is_regression <- is.null(lev)

export <- c("optimism_boot")

result <- foreach(iter = seq(along = resampleIndex), .combine = "c", .verbose = FALSE, .packages = pkgs, .errorhandling = "stop") %:%
foreach(parm = 1:nrow(info$loop), .combine = "c", .verbose = FALSE, .packages = pkgs, .errorhandling = "stop") %op% {
result <- foreach(iter = seq(along = resampleIndex), .combine = "c", .packages = pkgs, .export = export) %:%
foreach(parm = 1L:nrow(info$loop), .combine = "c", .packages = pkgs, .export = export) %op% {

testing <- FALSE

if(!(length(ctrl$seeds) == 1 && is.na(ctrl$seeds)))
if(!(length(ctrl$seeds) == 1L && is.na(ctrl$seeds)))
set.seed(ctrl$seeds[[iter]][parm])

loadNamespace("caret")
Expand All @@ -973,12 +975,6 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
holdoutIndex <- modelIndex
}

extraIndex <-
if (is.null(ctrl$indexExtra))
NULL
else
ctrl$indexExtra[[iter]]

if(testing) cat("pre-model\n")

if(!is.null(info$submodels[[parm]]) && nrow(info$submodels[[parm]]) > 0) {
Expand All @@ -995,11 +991,8 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
sampling = ctrl$sampling,
...),
silent = TRUE)


if(testing) print(mod_rec)

predictedExtra <- NULL
if(!model_failed(mod_rec)) {
predicted <- try(
rec_pred(method = method,
Expand All @@ -1016,13 +1009,6 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
verb = ctrl$verboseIter)

predicted <- fill_failed_pred(index = holdoutIndex, lev = lev, submod)
} else if(!is.null(extraIndex)) {
predictedExtra <- lapply(extraIndex, function(idx) {
rec_pred(method = method,
object = mod_rec,
newdata = subset_x(dat, idx),
param = submod)
})
}
} else {
fail_warning(settings = printed[parm,,drop = FALSE],
Expand All @@ -1034,36 +1020,18 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
}

if(testing) print(head(predicted))
probValuesExtra <- NULL
if(ctrl$classProbs) {
if(!model_failed(mod_rec)) {
probValues <- rec_prob(method = method,
object = mod_rec,
newdata = subset_x(dat, holdoutIndex),
param = submod)

if (!is.null(extraIndex))
probValuesExtra <- lapply(extraIndex, function(index) {
rec_prob(method = method,
object = mod_rec,
newdata = subset_x(dat, index),
param = submod)
})
} else {
probValues <- fill_failed_prob(holdoutIndex, lev, submod)
}
if(testing) print(head(probValues))
}

if(is.null(probValuesExtra)) {
probValuesExtra <- as.data.frame(matrix(NA, nrow = nrow(dat), ncol = length(lev)))
colnames(probValuesExtra) <- lev
if(!is.null(submod)) {
probValuesExtra <- rep(list(probValuesExtra), nrow(info$submodels[[parm]]) + 1L)
}
probValuesExtra <- rep(list(probValuesExtra), 2L)
}

##################################

predicted <- trim_values(predicted, ctrl, is_regression)
Expand All @@ -1076,7 +1044,7 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {

if(!is.null(submod)) {
## merge the fixed and seq parameter values together
allParam <- expandParameters(info$loop[parm,,drop = FALSE], info$submodels[[parm]])
allParam <- expandParameters(info$loop[parm,,drop = FALSE], submod)
allParam <- allParam[complete.cases(allParam),, drop = FALSE]

## collate the predictions across all the sub-models
Expand All @@ -1088,36 +1056,12 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
},
lv = lev,
dat = ho_data)

if(!is.null(predictedExtra))
predictedExtra <- mapply(predictedExtra, extraIndex,
SIMPLIFY = FALSE, USE.NAMES = FALSE,
FUN = function(pred, rows) {
lapply(pred, function(x) {
y <- y[rows]
wts <- wts[rows]

x <- outcome_conversion(x, lv = lev)
out <- data.frame(pred = x, obs = y, stringsAsFactors = FALSE)
if(!is.null(wts)) out$weights <- wts
out$rowIndex <- rows
out
})
})
if(testing) print(head(predicted))

## same for the class probabilities
if(ctrl$classProbs) {
predicted <- mapply(cbind, predicted, probValues, SIMPLIFY = FALSE)
if (!is.null(predictedExtra))
predictedExtra <- mapply(predictedExtra, probValuesExtra,
SIMPLIFY = FALSE,
FUN = function(predEx, probEx) {
mapply(cbind, predEx, probEx, SIMPLIFY = FALSE)
})
}
if(ctrl$classProbs) predicted <- mapply(cbind, predicted, probValues, SIMPLIFY = FALSE)

if(keep_pred || (ctrl$method == "boot_all" && names(resampleIndex)[iter] == "AllData")) {
if(keep_pred) {
tmpPred <- predicted
for(modIndex in seq(along = tmpPred)) {
tmpPred[[modIndex]] <- merge(tmpPred[[modIndex]],
Expand All @@ -1135,25 +1079,6 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
model = method)
if(testing) print(head(thisResample))

if(!is.null(predictedExtra)) {
thisResampleExtra <- lapply(predictedExtra, function(predicted) {
lapply(predicted,
ctrl$summaryFunction,
lev = lev,
model = method)
})
thisResampleExtra[[1L]] <- lapply(thisResampleExtra[[1L]], function(res) {
names(res) <- paste0(names(res), "Orig")
res
})
thisResampleExtra[[2L]] <- lapply(thisResampleExtra[[2L]], function(res) {
names(res) <- paste0(names(res), "Boot")
res
})
thisResampleExtra <- do.call(cbind, lapply(thisResampleExtra, function(x) do.call(rbind, x)))
thisResampleExtra <- cbind(allParam, thisResampleExtra)
} else thisResampleExtra <- NULL

## for classification, add the cell counts
if(length(lev) > 1 && length(lev) <= 50) {
cells <- lapply(predicted,
Expand All @@ -1171,8 +1096,7 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
if(ctrl$classProbs) tmp <- cbind(tmp, probValues)
tmp <- merge(tmp, info$loop[parm,,drop = FALSE], all = TRUE)

## not sure what the story is here
if(keep_pred || (ctrl$method == "boot_all" && names(resampleIndex)[iter] == "AllData")) {
if(keep_pred) {
tmpPred <- tmp
tmpPred$rowIndex <- holdoutIndex
tmpPred <- merge(tmpPred, info$loop[parm,,drop = FALSE],
Expand All @@ -1191,37 +1115,12 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
thisResample <- c(thisResample, flatTable(tmp$pred, tmp$obs))
thisResample <- as.data.frame(t(thisResample))
thisResample <- cbind(thisResample, info$loop[parm,,drop = FALSE])

## for optimism bootstrap
if(!is.null(predictedExtra)) {
thisResampleExtra <- mapply(predictedExtra, extraIndex, probValuesExtra,
SIMPLIFY = FALSE, USE.NAMES = FALSE,
FUN = function(predicted, holdoutIndex, probValues) {
if(is.factor(y)) predicted <- outcome_conversion(predicted, lv = lev)
tmp <- data.frame(pred = predicted,
obs = y[holdoutIndex],
stringsAsFactors = FALSE)
## Sometimes the code above does not coerce the first
## columnn to be named "pred" so force it
names(tmp)[1] <- "pred"
if(!is.null(wts)) tmp$weights <- wts[holdoutIndex]
if(ctrl$classProbs) tmp <- cbind(tmp, probValues)
tmp$rowIndex <- holdoutIndex
ctrl$summaryFunction(tmp, lev = lev, model = method)
})

names(thisResampleExtra[[1L]]) <- paste0(names(thisResampleExtra[[1L]]), "Orig")
names(thisResampleExtra[[2L]]) <- paste0(names(thisResampleExtra[[2L]]), "Boot")

thisResampleExtra <- unlist(thisResampleExtra, recursive = FALSE)

thisResampleExtra <- cbind(as.data.frame(t(thisResampleExtra)), info$loop[parm, , drop = FALSE])

} else thisResampleExtra <- NULL

}
thisResample$Resample <- names(resampleIndex)[iter]

thisResampleExtra <- optimism_boot(ctrl, dat, iter, lev, method, mod_rec, predicted,
submod, info$loop[parm,, drop = FALSE])

if(ctrl$verboseIter)
progress(printed[parm,,drop = FALSE],
names(resampleIndex), iter, FALSE)
Expand All @@ -1238,11 +1137,11 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
perfNames <- perfNames[!(perfNames %in% c("Resample", as.character(method$parameters$parameter)))]
perfNames <- perfNames[!grepl("^\\.cell[0-9]", perfNames)]
apparent <- subset(resamples, Resample == "AllData")
apparent <- apparent[,!grepl("^\\.cell|Resample", colnames(apparent)),drop = FALSE]
apparent <- apparent[,!grepl("^\\.cell|Resample", colnames(apparent)), drop = FALSE]
names(apparent)[which(names(apparent) %in% perfNames)] <-
paste(names(apparent)[which(names(apparent) %in% perfNames)], "Apparent", sep = "")
names(apparent) <- gsub("^\\.", "", names(apparent))
if(any(!complete.cases(apparent[,!grepl("^cell|Resample", colnames(apparent)),drop = FALSE])))
if(any(!complete.cases(apparent[,!grepl("^cell|Resample", colnames(apparent)), drop = FALSE])))
warning("There were missing values in the apparent performance measures.")

resamples <- subset(resamples, Resample != "AllData")
Expand All @@ -1253,10 +1152,10 @@ train_rec <- function(rec, dat, info, method, ctrl, lev, testing = FALSE, ...) {
}
names(resamples) <- gsub("^\\.", "", names(resamples))

if(any(!complete.cases(resamples[,!grepl("^cell|Resample", colnames(resamples)),drop = FALSE])))
if(any(!complete.cases(resamples[,!grepl("^cell|Resample", colnames(resamples)), drop = FALSE])))
warning("There were missing values in resampled performance measures.")

out <- ddply(resamples[,!grepl("^cell|Resample", colnames(resamples)),drop = FALSE],
out <- ddply(resamples[,!grepl("^cell|Resample", colnames(resamples)), drop = FALSE],
## TODO check this for seq models
gsub("^\\.", "", colnames(info$loop)),
MeanSD,
Expand Down
2 changes: 1 addition & 1 deletion pkg/caret/R/workflows.R
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ nominalTrainWorkflow <- function(x, y, wts, info, method, ppOpts, ctrl, lev, tes
if(ctrl$classProbs) tmp <- cbind(tmp, probValues)
tmp$rowIndex <- holdoutIndex

if(keep_pred || (ctrl$method == "boot_all" && names(resampleIndex)[iter] == "AllData"))
if(keep_pred)
{
tmpPred <- tmp
tmpPred$rowIndex <- holdoutIndex
Expand Down

0 comments on commit 11f7fac

Please sign in to comment.