Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
757 lines (689 sloc) 26.2 KB
#' @title Create a stacked learner object.
#'
#' @description A stacked learner uses predictions of several base learners and fits
#' a super learner using these predictions as features in order to predict the outcome.
#' The following stacking methods are available:
#'
#' \describe{
#' \item{`average`}{Averaging of base learner predictions without weights.}
#' \item{`stack.nocv`}{Fits the super learner, where in-sample predictions of the base learners are used.}
#' \item{`stack.cv`}{Fits the super learner, where the base learner predictions are computed
#' by crossvalidated predictions (the resampling strategy can be set via the `resampling` argument).}
#' \item{`hill.climb`}{Select a subset of base learner predictions by hill climbing algorithm.}
#' \item{`compress`}{Train a neural network to compress the model from a collection of base learners.}
#' }
#'
#' @param base.learners [(list of) [Learner])\cr
#' A list of learners created with `makeLearner`.
#' @param super.learner [\code{\link{Learner} | character(1)}]\cr
#' The super learner that makes the final prediction based on the base learners.
#' If you pass a string, the super learner will be created via `makeLearner`.
#' Not used for `method = 'average'`. Default is `NULL`.
#' @param predict.type (`character(1)`)\cr
#' Sets the type of the final prediction for `method = 'average'`.
#' For other methods, the predict type should be set within `super.learner`.
#' If the type of the base learner prediction, which is set up within `base.learners`, is
#' \describe{
#' \item{`"prob"`}{then `predict.type = 'prob'` will use the average of all
#' bease learner predictions and `predict.type = 'response'` will use
#' the class with highest probability as final prediction.}
#' \item{`"response"`}{then, for classification tasks with `predict.type = 'prob'`,
#' the final prediction will be the relative frequency based on the predicted base learner classes
#' and classification tasks with `predict.type = 'response'` will use majority vote of the base
#' learner predictions to determine the final prediction.
#' For regression tasks, the final prediction will be the average of the base learner predictions.}
#' }
#'
#' @param method (`character(1)`)\cr
#' \dQuote{average} for averaging the predictions of the base learners,
#' \dQuote{stack.nocv} for building a super learner using the predictions of the base learners,
#' \dQuote{stack.cv} for building a super learner using crossvalidated predictions of the base learners.
#' \dQuote{hill.climb} for averaging the predictions of the base learners, with the weights learned from
#' hill climbing algorithm and
#' \dQuote{compress} for compressing the model to mimic the predictions of a collection of base learners
#' while speeding up the predictions and reducing the size of the model.
#' Default is \dQuote{stack.nocv},
#' @param use.feat (`logical(1)`)\cr
#' Whether the original features should also be passed to the super learner.
#' Not used for `method = 'average'`.
#' Default is `FALSE`.
#' @param resampling ([ResampleDesc])\cr
#' Resampling strategy for `method = 'stack.cv'`.
#' Currently only CV is allowed for resampling.
#' The default `NULL` uses 5-fold CV.
#' @param parset the parameters for `hill.climb` method, including
#' \describe{
#' \item{`replace`}{Whether a base learner can be selected more than once.}
#' \item{`init`}{Number of best models being included before the selection algorithm.}
#' \item{`bagprob`}{The proportion of models being considered in one round of selection.}
#' \item{`bagtime`}{The number of rounds of the bagging selection.}
#' \item{`metric`}{The result evaluation metric function taking two parameters `pred` and `true`,
#' the smaller the score the better.}
#' }
#' the parameters for `compress` method, including
#' \describe{
#' \item{k}{the size multiplier of the generated data}
#' \item{prob}{the probability to exchange values}
#' \item{s}{the standard deviation of each numerical feature}
#' }
#' @examples
#' # Classification
#' data(iris)
#' tsk = makeClassifTask(data = iris, target = "Species")
#' base = c("classif.rpart", "classif.lda", "classif.svm")
#' lrns = lapply(base, makeLearner)
#' lrns = lapply(lrns, setPredictType, "prob")
#' m = makeStackedLearner(base.learners = lrns,
#' predict.type = "prob", method = "hill.climb")
#' tmp = train(m, tsk)
#' res = predict(tmp, tsk)
#'
#' # Regression
#' data(BostonHousing, package = "mlbench")
#' tsk = makeRegrTask(data = BostonHousing, target = "medv")
#' base = c("regr.rpart", "regr.svm")
#' lrns = lapply(base, makeLearner)
#' m = makeStackedLearner(base.learners = lrns,
#' predict.type = "response", method = "compress")
#' tmp = train(m, tsk)
#' res = predict(tmp, tsk)
#' @noMd
#' @export
makeStackedLearner = function(base.learners, super.learner = NULL, predict.type = NULL,
method = "stack.nocv", use.feat = FALSE, resampling = NULL, parset = list()) {
if (is.character(base.learners)) base.learners = lapply(base.learners, checkLearner)
if (is.null(super.learner) && method == "compress") {
super.learner = makeLearner(stri_paste(base.learners[[1]]$type, ".nnet"))
}
if (!is.null(super.learner)) {
super.learner = checkLearner(super.learner)
if (!is.null(predict.type)) super.learner = setPredictType(super.learner, predict.type)
}
base.type = unique(extractSubList(base.learners, "type"))
if (!is.null(resampling) & method != "stack.cv") {
stop("No resampling needed for this method")
}
if (is.null(resampling)) {
resampling = makeResampleDesc("CV", iters = 5L,
stratify = ifelse(base.type == "classif", TRUE, FALSE))
}
assertChoice(method, c("average", "stack.nocv", "stack.cv", "hill.climb", "compress"))
assertClass(resampling, "ResampleDesc")
pts = unique(extractSubList(base.learners, "predict.type"))
if ("se" %in% pts || (!is.null(predict.type) && predict.type == "se") ||
(!is.null(super.learner) && super.learner$predict.type == "se")) {
stop("Predicting standard errors currently not supported.")
}
if (length(pts) > 1L) {
stop("Base learner must all have the same predict type!")
}
if ((method == "average" || method == "hill.climb") & (!is.null(super.learner) || is.null(predict.type))) {
stop("No super learner needed for this method or the 'predict.type' is not specified.")
}
if (method != "average" & method != "hill.climb" & is.null(super.learner)) {
stop("You have to specify a super learner for this method.")
}
# if (method != "average" & !is.null(predict.type))
# stop("Predict type has to be specified within the super learner.")
if ((method == "average" || method == "hill.climb") & use.feat) {
stop("The original features can not be used for this method")
}
if (!inherits(resampling, "CVDesc")) {
stop("Currently only CV is allowed for resampling!")
}
# lrn$predict.type is "response" by default change it using setPredictType
lrn = makeBaseEnsemble(
id = "stack",
base.learners = base.learners,
cl = "StackedLearner"
)
# get predict.type from super learner or from predict.type
if (!is.null(super.learner)) {
lrn = setPredictType(lrn, predict.type = super.learner$predict.type)
} else {
lrn = setPredictType(lrn, predict.type = predict.type)
}
lrn$fix.factors.prediction = TRUE
lrn$use.feat = use.feat
lrn$method = method
lrn$super.learner = super.learner
lrn$resampling = resampling
lrn$parset = parset
return(lrn)
}
# FIXME: see FIXME in predict.StackedLearner I don't know how to make it better.
#'
#' @title Returns the predictions for each base learner.
#'
#' @description Returns the predictions for each base learner.
#'
#' @param model ([WrappedModel])\cr Wrapped model, result of train.
#' @param newdata ([data.frame])\cr
#' New observations, for which the predictions using the specified base learners should be returned.
#' Default is `NULL` and extracts the base learner predictions that were made during the training.
#'
#' @details None.
#'
#' @export
getStackedBaseLearnerPredictions = function(model, newdata = NULL) {
# get base learner and predict type
bms = model$learner.model$base.models
method = model$learner.model$method
if (is.null(newdata)) {
probs = model$learner.model$pred.train
} else {
# if (model == "stack.cv") warning("Crossvalidated predictions for new data is not possible for this method.")
# predict prob vectors with each base model
probs = vector("list", length(bms))
for (i in seq_along(bms)) {
pred = predict(bms[[i]], newdata = newdata)
probs[[i]] = getResponse(pred, full.matrix = ifelse(method %in% c("average", "hill.climb"), TRUE, FALSE))
}
names(probs) = sapply(bms, function(X) X$learner$id) # names(.learner$base.learners)
}
return(probs)
}
#' @export
trainLearner.StackedLearner = function(.learner, .task, .subset, ...) {
# reduce to subset we want to train ensemble on
.task = subsetTask(.task, subset = .subset)
switch(.learner$method,
average = averageBaseLearners(.learner, .task),
stack.nocv = stackNoCV(.learner, .task),
stack.cv = stackCV(.learner, .task),
# hill.climb = hillclimbBaseLearners(.learner, .task, ...)
hill.climb = do.call(hillclimbBaseLearners, c(list(.learner, .task), .learner$parset)),
compress = compressBaseLearners(.learner, .task, .learner$parset)
)
}
# FIXME: if newdata is the same data that was also used by training, then getBaseLearnerPrediction
# won't use the crossvalidated predictions (for method = "stack.cv").
#' @export
predictLearner.StackedLearner = function(.learner, .model, .newdata, ...) {
use.feat = .model$learner$use.feat
# get predict.type from learner and super model (if available)
sm.pt = .model$learner$predict.type
sm = .model$learner.model$super.model
# get base learner and predict type
bms.pt = unique(extractSubList(.model$learner$base.learners, "predict.type"))
# get task information (classif)
td = .model$task.desc
type = ifelse(td$type == "regr", "regr",
ifelse(length(td$class.levels) == 2L, "classif", "multiclassif"))
# predict prob vectors with each base model
if (.learner$method != "compress") {
probs = getStackedBaseLearnerPredictions(model = .model, newdata = .newdata)
} else {
probs = .newdata
}
if (.learner$method %in% c("average", "hill.climb")) {
if (.learner$method == "hill.climb") {
model.weight = .model$learner.model$weights
} else {
model.weight = rep(1 / length(probs), length(probs))
}
if (bms.pt == "prob") {
# if base learner predictions are probabilities for classification
for (i in seq_along(probs)) {
probs[[i]] = probs[[i]] * model.weight[i]
}
prob = Reduce("+", probs)
if (sm.pt == "prob") {
# if super learner predictions should be probabilities
return(as.matrix(prob))
} else {
# if super learner predictions should be responses
return(factor(colnames(prob)[max.col(prob)], td$class.levels))
}
} else {
probs = as.data.frame(probs)
# if base learner predictions are responses
if (type == "classif" || type == "multiclassif") {
# if base learner predictions are responses for classification
if (sm.pt == "prob") {
# if super learner predictions should be probabilities, iter over rows to get proportions
# FIXME: this is very slow + CUMBERSOME. we also do it in more places
# we need a bbmisc fun for counting proportions in rows or cols
# probs = apply(probs, 1L, function(x) (table(factor(x, td$class.levels))/length(x)))
# return(setColNames(t(probs), td$class.levels))
probs = rowiseRatio(probs, td$class.levels, model.weight)
return(probs)
} else {
# if super learner predictions should be responses
return(factor(apply(probs, 1L, computeMode), td$class.levels))
}
}
if (type == "regr") {
# if base learner predictions are responses for regression
prob = Reduce("+", probs) / length(probs) # rowMeans(probs)
return(prob)
}
}
} else if (.learner$method == "compress") {
probs = as.data.frame(probs)
pred = predict(sm, newdata = probs)
if (sm.pt == "prob") {
return(as.matrix(getPredictionProbabilities(pred, cl = td$class.levels)))
} else {
return(pred$data$response)
}
} else {
probs = as.data.frame(probs)
# feed probs into super model and we are done
feat = .newdata[, colnames(.newdata) %nin% td$target, drop = FALSE]
if (use.feat) {
pred.data = cbind(probs, feat)
} else {
pred.data = probs
}
pred = predict(sm, newdata = pred.data)
if (sm.pt == "prob") {
return(as.matrix(getPredictionProbabilities(pred, cl = td$class.levels)))
} else {
return(pred$data$response)
}
}
}
# Sets the predict.type for the super learner of a stacked learner
#' @export
setPredictType.StackedLearner = function(learner, predict.type) {
lrn = setPredictType.Learner(learner, predict.type)
lrn$predict.type = predict.type
if ("super.learner" %in% names(lrn)) lrn$super.learner$predict.type = predict.type
return(lrn)
}
### helpers to implement different ensemble types ###
# super simple averaging of base-learner predictions without weights. we should beat this
averageBaseLearners = function(learner, task) {
bls = learner$base.learners
base.models = probs = vector("list", length(bls))
for (i in seq_along(bls)) {
bl = bls[[i]]
model = train(bl, task)
base.models[[i]] = model
#
pred = predict(model, task = task)
probs[[i]] = getResponse(pred, full.matrix = TRUE)
}
names(probs) = names(bls)
list(method = "average", base.models = base.models, super.model = NULL,
pred.train = probs)
}
# stacking where we predict the training set in-sample, then super-learn on that
stackNoCV = function(learner, task) {
td = getTaskDesc(task)
type = ifelse(td$type == "regr", "regr",
ifelse(length(td$class.levels) == 2L, "classif", "multiclassif"))
bls = learner$base.learners
use.feat = learner$use.feat
base.models = probs = vector("list", length(bls))
for (i in seq_along(bls)) {
bl = bls[[i]]
model = train(bl, task)
base.models[[i]] = model
pred = predict(model, task = task)
probs[[i]] = getResponse(pred, full.matrix = FALSE)
}
names(probs) = names(bls)
pred.train = probs
if (type == "regr" || type == "classif") {
probs = as.data.frame(probs)
} else {
probs = as.data.frame(lapply(probs, function(X) X)) # X[, -ncol(X)]))
}
# now fit the super learner for predicted_probs --> target
probs[[td$target]] = getTaskTargets(task)
if (use.feat) {
# add data with normal features
feat = getTaskData(task)
feat = feat[, colnames(feat) %nin% td$target, drop = FALSE]
probs = cbind(probs, feat)
super.task = makeSuperLearnerTask(learner, data = probs,
target = td$target)
} else {
super.task = makeSuperLearnerTask(learner, data = probs, target = td$target)
}
super.model = train(learner$super.learner, super.task)
list(method = "stack.no.cv", base.models = base.models,
super.model = super.model, pred.train = pred.train)
}
# stacking where we crossval the training set with the base learners, then super-learn on that
stackCV = function(learner, task) {
td = getTaskDesc(task)
type = ifelse(td$type == "regr", "regr",
ifelse(length(td$class.levels) == 2L, "classif", "multiclassif"))
bls = learner$base.learners
use.feat = learner$use.feat
# cross-validate all base learners and get a prob vector for the whole dataset for each learner
base.models = probs = vector("list", length(bls))
rin = makeResampleInstance(learner$resampling, task = task)
for (i in seq_along(bls)) {
bl = bls[[i]]
r = resample(bl, task, rin, show.info = FALSE)
probs[[i]] = getResponse(r$pred, full.matrix = FALSE)
# also fit all base models again on the complete original data set
base.models[[i]] = train(bl, task)
}
names(probs) = names(bls)
if (type == "regr" || type == "classif") {
probs = as.data.frame(probs)
} else {
probs = as.data.frame(lapply(probs, function(X) X)) # X[, -ncol(X)]))
}
# add true target column IN CORRECT ORDER
tn = getTaskTargetNames(task)
test.inds = unlist(rin$test.inds)
pred.train = as.list(probs[order(test.inds), , drop = FALSE])
probs[[tn]] = getTaskTargets(task)[test.inds]
# now fit the super learner for predicted_probs --> target
probs = probs[order(test.inds), , drop = FALSE]
if (use.feat) {
# add data with normal features IN CORRECT ORDER
feat = getTaskData(task) # [test.inds, ]
feat = feat[, !colnames(feat) %in% tn, drop = FALSE]
pred.data = cbind(probs, feat)
super.task = makeSuperLearnerTask(learner, data = pred.data, target = tn)
} else {
super.task = makeSuperLearnerTask(learner, data = probs, target = tn)
}
super.model = train(learner$super.learner, super.task)
list(method = "stack.cv", base.models = base.models,
super.model = super.model, pred.train = pred.train)
}
hillclimbBaseLearners = function(learner, task, replace = TRUE, init = 0, bagprob = 1, bagtime = 1,
metric = NULL, ...) {
assertFlag(replace)
assertInt(init, lower = 0)
assertNumber(bagprob, lower = 0, upper = 1)
assertInt(bagtime, lower = 1)
td = getTaskDesc(task)
type = ifelse(td$type == "regr", "regr",
ifelse(length(td$class.levels) == 2L, "classif", "multiclassif"))
if (is.null(metric)) {
if (type == "regr") {
metric = function(pred, true) mean((pred - true)^2)
} else {
metric = function(pred, true) {
pred = colnames(pred)[max.col(pred)]
tb = table(pred, true)
return(1 - sum(diag(tb)) / sum(tb))
}
}
}
assertFunction(metric)
bls = learner$base.learners
if (type != "regr") {
for (i in seq_along(bls)) {
if (bls[[i]]$predict.type == "response") {
stop("Hill climbing algorithm only takes probability predict type for classification.")
}
}
}
# cross-validate all base learners and get a prob vector for the whole dataset for each learner
base.models = probs = vector("list", length(bls))
rin = makeResampleInstance(learner$resampling, task = task)
for (i in seq_along(bls)) {
bl = bls[[i]]
r = resample(bl, task, rin, show.info = FALSE)
if (type == "regr") {
probs[[i]] = matrix(getResponse(r$pred), ncol = 1)
} else {
probs[[i]] = getResponse(r$pred, full.matrix = TRUE)
colnames(probs[[i]]) = task$task.desc$class.levels
}
# also fit all base models again on the complete original data set
base.models[[i]] = train(bl, task)
}
names(probs) = names(bls)
# add true target column IN CORRECT ORDER
tn = getTaskTargetNames(task)
test.inds = unlist(rin$test.inds)
# now start the hill climbing
probs = lapply(probs, function(x) x[order(test.inds), , drop = FALSE])
probs[[tn]] = getTaskTargets(task)[test.inds]
probs[[tn]] = probs[[tn]][order(test.inds)]
# probs = probs[order(test.inds), , drop = FALSE]
m = length(bls)
weights = rep(0, m)
flag = TRUE
for (bagind in 1:bagtime) {
# bagging of models
bagsize = ceiling(m * bagprob)
bagmodel = sample(1:m, bagsize)
weight = rep(0, bagsize)
# Initial selection of strongest learners
inds = NULL
if (init > 0) {
score = rep(Inf, bagsize)
for (i in bagmodel) {
score[i] = metric(probs[[i]], probs[[tn]])
}
inds = order(score)[1:init]
weight[inds] = 1
}
selection.size = init
selection.ind = inds
# current.prob = rep(0, nrow(probs))
current.prob = matrix(0, nrow(probs[[1]]), ncol(probs[[1]]))
old.score = Inf
if (selection.size > 0) {
current.prob = Reduce("+", probs[selection.ind])
old.score = metric(current.prob / selection.size, probs[[tn]])
}
flag = TRUE
while (flag) {
score = rep(Inf, bagsize)
for (i in bagmodel) {
score[i] = metric((probs[[i]] + current.prob) / (selection.size + 1), probs[[tn]])
}
inds = order(score)
if (!replace) {
ind = setdiff(inds, selection.ind)[1]
} else {
ind = inds[1]
}
new.score = score[ind]
if (old.score - new.score < 1e-8) {
flag = FALSE
} else {
current.prob = current.prob + probs[[ind]]
weights[ind] = weights[ind] + 1
selection.ind = c(selection.ind, ind)
selection.size = selection.size + 1
old.score = new.score
}
}
weights[bagmodel] = weights[bagmodel] + weight
}
weights = weights / sum(weights)
list(method = "hill.climb", base.models = base.models, super.model = NULL,
pred.train = probs, weights = weights)
}
compressBaseLearners = function(learner, task, parset = list()) {
lrn = learner
lrn$method = "hill.climb"
ensemble.model = train(lrn, task)
data = getTaskData(task, target.extra = TRUE)
data = data[[1]]
pseudo.data = do.call(getPseudoData, c(list(data), parset))
pseudo.target = predict(ensemble.model, newdata = pseudo.data)
pseudo.data = data.frame(pseudo.data, target = pseudo.target$data$response)
td = ensemble.model$task.desc
type = ifelse(td$type == "regr", "regr",
ifelse(length(td$class.levels) == 2L, "classif", "multiclassif"))
if (type == "regr") {
new.task = makeRegrTask(data = pseudo.data, target = "target")
if (is.null(learner$super.learner)) {
m = makeLearner("regr.nnet", predict.type = ) # nolint
} else {
m = learner$super.learner
}
} else {
new.task = makeClassifTask(data = pseudo.data, target = "target")
if (is.null(learner$super.learner)) {
m = makeLearner("classif.nnet", predict.type = "")
} else {
m = learner$super.learner
}
}
super.model = train(m, new.task)
list(method = "compress", base.learners = lrn$base.learners, super.model = super.model,
pred.train = pseudo.data)
}
### other helpers ###
# Returns response for correct usage in stackNoCV and stackCV and for predictions
getResponse = function(pred, full.matrix = TRUE) {
# if classification with probabilities
if (pred$predict.type == "prob") {
if (full.matrix) {
# return matrix of probabilities
td = pred$task.desc
pred.return = pred$data[, stri_paste("prob", td$class.levels, sep = ".")]
colnames(pred.return) = td$class.levels
return(pred.return)
} else {
# return only vector of probabilities for binary classification
return(getPredictionProbabilities(pred))
}
} else {
# if regression task
pred$data$response
}
}
# Create a super learner task
makeSuperLearnerTask = function(learner, data, target) {
if (learner$super.learner$type == "classif") {
makeClassifTask(data = data, target = target)
} else {
makeRegrTask(data = data, target = target)
}
}
# Count the ratio
rowiseRatio = function(probs, levels, model.weight = NULL) {
m = length(levels)
p = ncol(probs)
if (is.null(model.weight)) {
model.weight = rep(1 / p, p)
}
mat = matrix(0, nrow(probs), m)
for (i in 1:m) {
ids = matrix(probs == levels[i], nrow(probs), p)
for (j in 1:p) {
ids[, j] = ids[, j] * model.weight[j]
}
mat[, i] = rowSums(ids)
}
colnames(mat) = levels
return(mat)
}
getPseudoData = function(.data, k = 3, prob = 0.1, s = NULL, ...) {
res = NULL
n = nrow(.data)
ori.names = names(.data)
feat.class = sapply(.data, class)
ind2 = which(feat.class == "factor")
ind1 = setdiff(seq_len(ncol(.data)), ind2)
if (length(ind2) > 0) {
ori.labels = lapply(.data[[ind2]], levels)
}
.data = lapply(.data, as.numeric)
.data = as.data.frame(.data)
# Normalization
mn = rep(0, ncol(.data))
mx = rep(0, ncol(.data))
for (i in ind1) {
mn[i] = min(.data[, i])
mx[i] = max(.data[, i])
.data[, i] = (.data[, i] - mn[i]) / (mx[i] - mn[i])
}
if (is.null(s)) {
s = rep(0, ncol(.data))
for (i in ind1) {
s[i] = sd(.data[, i])
}
}
testNumeric(s, len = ncol(.data), lower = 0)
# Func to calc dist
hamming = function(mat) {
n = nrow(mat)
m = ncol(mat)
res = matrix(0, n, n)
for (j in 1:m) {
unq = unique(mat[, j])
p = length(unq)
for (i in 1:p) {
ind = which(mat[, j] == unq[i])
res[ind, -ind] = res[ind, -ind] + 1
}
}
return(res)
}
one.nn = function(mat, ind1, ind2) {
n = nrow(mat)
dist.mat.1 = matrix(0, n, n)
dist.mat.2 = matrix(0, n, n)
if (length(ind1) > 0) {
dist.mat.1 = as.matrix(stats::dist(mat[, ind1, drop = FALSE]))
}
if (length(ind2) > 0) {
dist.mat.2 = hamming(mat[, ind2, drop = FALSE])
}
dist.mat = dist.mat.1 + dist.mat.2
neighbour = max.col(-dist.mat - diag(Inf, n))
return(neighbour)
}
# Get the neighbour
neighbour = one.nn(.data, ind1, ind2)
# Start the loop
p = ncol(.data)
for (loop in 1:k) {
data = .data
prob.mat = matrix(sample(c(0, 1), n * p, replace = TRUE, prob = c(prob, 1 - prob)), n, p)
prob.mat = prob.mat == 0
for (i in 1:n) {
e = as.numeric(data[i, ])
ee = as.numeric(data[neighbour[i], ])
# continuous
for (j in ind1) {
if (prob.mat[i, j]) {
current.sd = abs(e[j] - ee[j]) / s[j]
tmp1 = rnorm(1, ee[j], current.sd)
tmp2 = rnorm(1, e[j], current.sd)
e[j] = tmp1
ee[j] = tmp2
}
}
for (j in ind2) {
if (prob.mat[i, j]) {
tmp = e[j]
e[j] = ee[j]
ee[j] = tmp
}
}
data[i, ] = ee
data[neighbour[i], ] = e
}
res = rbind(res, data)
}
for (i in ind1) {
res[, i] = res[, i] * (mx[i] - mn[i]) + mn[i]
}
res = data.frame(res)
names(res) = ori.names
for (i in ind2) {
res[[i]] = factor(res[[i]], labels = ori.labels[[i]])
}
return(res)
}
# FIXMEs:
# - document + test + export
# - benchmark stuff on openml
# - allow base.learners to be character of learners (not only list of learners)
# - rename 'probs' in code into 'preds'
# - allow option to remove predictions for one class in multiclass tasks (to avoid collinearity)
# - DONE: return predictions from each single base learner
# - DONE: allow predict.type = "response" for classif using majority vote (for super learner predict type "response")
# and using average for super learner predict type "prob".
# - DONE: add option to use normal features in super learner
# - DONE: super learner can also return predicted probabilites
# - DONE: allow regression as well
You can’t perform that action at this time.