Skip to content

Commit

Permalink
#744 Use stringi internally (#904)
Browse files Browse the repository at this point in the history
  • Loading branch information
masongallo authored and berndbischl committed Jun 1, 2016
1 parent 12cae17 commit 3f6666e
Show file tree
Hide file tree
Showing 58 changed files with 123 additions and 115 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Expand Up @@ -32,7 +32,8 @@ Depends:
BBmisc (>= 1.9),
ggplot2,
ParamHelpers (>= 1.7),
stats
stats,
stringi
Imports:
checkmate (>= 1.7.1),
data.table,
Expand All @@ -42,7 +43,6 @@ Imports:
plyr,
reshape2,
shiny,
stringi,
survival
Suggests:
ada,
Expand Down
2 changes: 1 addition & 1 deletion R/BaggingWrapper.R
Expand Up @@ -62,7 +62,7 @@ makeBaggingWrapper = function(learner, bw.iters = 10L, bw.replace = TRUE, bw.siz
}
if (learner$predict.type != "response")
stop("Predict type of the basic learner must be 'response'.")
id = paste(learner$id, "bagged", sep = ".")
id = stri_paste(learner$id, "bagged", sep = ".")
packs = learner$package
ps = makeParamSet(
makeIntegerLearnerParam(id = "bw.iters", lower = 1L, default = 10L),
Expand Down
8 changes: 4 additions & 4 deletions R/BaseEnsemble_operators.R
@@ -1,8 +1,8 @@
# find the learner for a given param name, so <learnerid>.<paramid>
matchBaseEnsembleLearner = function(ensemble, pn) {
patterns = paste0("^", names(ensemble$base.learners), "\\.")
j = which(vlapply(patterns, function(p) grepl(p, pn)))
par.id = sub(patterns[j], "", pn)
patterns = stri_paste("^", names(ensemble$base.learners), "\\.")
j = which(vlapply(patterns, stri_detect_regex, str = pn))
par.id = stri_replace_first(pn, "", regex = patterns[j])
list(index = j, par.id = par.id)
}

Expand All @@ -11,7 +11,7 @@ getHyperPars.BaseEnsemble = function(learner, for.fun = c("train", "predict", "b
pvs = lapply(learner$base.learners, function(lrn) {
xs = getHyperPars.Learner(lrn, for.fun = for.fun)
if (length(xs) > 0L)
names(xs) = paste0(lrn$id, ".", names(xs))
names(xs) = stri_paste(lrn$id, ".", names(xs))
return(xs)
})
# if we dont do this, R prefixes the list names again.
Expand Down
4 changes: 2 additions & 2 deletions R/BaseWrapper.R
Expand Up @@ -27,10 +27,10 @@ print.BaseWrapper = function(x, ...) {
s = ""
y = x
while (inherits(y, "BaseWrapper")) {
s = paste(s, class(y)[1L], "->", sep = "")
s = stri_paste(s, class(y)[1L], "->", sep = "")
y = y$next.learner
}
s = paste(s, class(y)[1L])
s = stri_paste(s, class(y)[1L], sep = " ")
print.Learner(x)
}

Expand Down
2 changes: 1 addition & 1 deletion R/ClassifTask.R
Expand Up @@ -47,7 +47,7 @@ makeTaskDesc.ClassifTask = function(task, id, target, positive) {
td$positive = positive
td$negative = NA_character_
if (length(td$class.levels) == 1L)
td$negative = paste0("not_", positive)
td$negative = stri_paste("not_", positive)
else if (length(td$class.levels) == 2L)
td$negative = setdiff(td$class.levels, positive)
return(addClasses(td, c("TaskDescClassif", "TaskDescSupervised")))
Expand Down
2 changes: 1 addition & 1 deletion R/CostSensClassifWrapper.R
Expand Up @@ -19,7 +19,7 @@
makeCostSensClassifWrapper = function(learner) {
learner = checkLearnerClassif(learner)
learner = setPredictType(learner, "response")
id = paste("costsens", learner$id, sep = ".")
id = stri_paste("costsens", learner$id, sep = ".")
makeBaseWrapper(id, "costsens", learner, package = learner$package,
learner.subclass = "CostSensClassifWrapper", model.subclass = "CostSensClassifModel")
}
Expand Down
2 changes: 1 addition & 1 deletion R/CostSensRegrWrapper.R
Expand Up @@ -17,7 +17,7 @@ makeCostSensRegrWrapper = function(learner) {
learner = checkLearnerRegr(learner)
# we cannot make use of 'se' here
learner = setPredictType(learner, "response")
id = paste("costsens", learner$id, sep = ".")
id = stri_paste("costsens", learner$id, sep = ".")
makeHomogeneousEnsemble(id, type = "costsens", learner, package = learner$package,
learner.subclass = "CostSensRegrWrapper", model.subclass = "CostSensRegrModel")
}
Expand Down
2 changes: 1 addition & 1 deletion R/CostSensTask.R
Expand Up @@ -16,7 +16,7 @@ makeCostSensTask = function(id = deparse(substitute(data)), data, costs, blockin
if (is.data.frame(costs))
costs = as.matrix(costs)
if (is.null(colnames(costs)))
colnames(costs) = paste0("y", seq_col(costs))
colnames(costs) = stri_paste("y", seq_col(costs))
}
task = makeSupervisedTask("costsens", data, target, weights, blocking, fixup.data = fixup.data, check.data = check.data)
task$env$costs = costs
Expand Down
2 changes: 1 addition & 1 deletion R/CostSensWeightedPairsWrapper.R
Expand Up @@ -22,7 +22,7 @@
makeCostSensWeightedPairsWrapper = function(learner) {
learner = checkLearnerClassif(learner, weights = TRUE)
learner = setPredictType(learner, "response")
id = paste("costsens", learner$id, sep = ".")
id = stri_paste("costsens", learner$id, sep = ".")
makeHomogeneousEnsemble(id, "costsens", learner, package = learner$package,
learner.subclass = "CostSensWeightedPairsWrapper", model.subclass = "CostSensWeightedPairsModel")
}
Expand Down
2 changes: 1 addition & 1 deletion R/DownsampleWrapper.R
Expand Up @@ -29,7 +29,7 @@ makeDownsampleWrapper = function(learner, dw.perc = 1, dw.stratify = FALSE) {
assertFlag(dw.stratify)
pv$dw.stratify = dw.stratify
}
id = paste(learner$id, "downsampled", sep = ".")
id = stri_paste(learner$id, "downsampled", sep = ".")
ps = makeParamSet(
makeNumericLearnerParam(id = "dw.perc", lower = 0, upper = 1, default = 1),
makeLogicalLearnerParam(id = "dw.stratify", default = FALSE)
Expand Down
2 changes: 1 addition & 1 deletion R/FeatSelWrapper.R
Expand Up @@ -44,7 +44,7 @@ makeFeatSelWrapper = function(learner, resampling, measures, bit.names, bits.to.
}
assertClass(control, classes = "FeatSelControl")
assertFlag(show.info)
id = paste(learner$id, "featsel", sep = ".")
id = stri_paste(learner$id, "featsel", sep = ".")
x = makeOptWrapper(id, learner, resampling, measures, makeParamSet(), bit.names,
bits.to.features, control, show.info, "FeatSelWrapper", "FeatSelModel")
# checkVarselParset(learner, par.set, bit.names, control)
Expand Down
6 changes: 3 additions & 3 deletions R/Filter.R
Expand Up @@ -61,7 +61,7 @@ listFilterMethods = function(desc = TRUE, tasks = FALSE, features = FALSE) {
tag2df = function(tags, prefix = "") {
unique.tags = sort(unique(unlist(tags)))
res = asMatrixRows(lapply(tags, "%in%", x = unique.tags))
colnames(res) = paste0(prefix, unique.tags)
colnames(res) = stri_paste(prefix, unique.tags)
rownames(res) = NULL
as.data.frame(res)
}
Expand Down Expand Up @@ -344,7 +344,7 @@ makeFilter(
fun = function(task, nselect, ...) {
data = getTaskData(task)
sapply(getTaskFeatureNames(task), function(feat.name) {
f = as.formula(paste0(feat.name,"~",getTaskTargetNames(task)))
f = as.formula(stri_paste(feat.name,"~",getTaskTargetNames(task)))
aov.t = aov(f, data = data)
summary(aov.t)[[1]][1,'F value']
})
Expand All @@ -360,7 +360,7 @@ makeFilter(
fun = function(task, nselect, ...) {
data = getTaskData(task)
sapply(getTaskFeatureNames(task), function(feat.name) {
f = as.formula(paste0(feat.name,"~", getTaskTargetNames(task)))
f = as.formula(stri_paste(feat.name,"~", getTaskTargetNames(task)))
t = kruskal.test(f, data = data)
unname(t$statistic)
})
Expand Down
2 changes: 1 addition & 1 deletion R/FilterWrapper.R
Expand Up @@ -53,7 +53,7 @@ makeFilterWrapper = function(learner, fw.method = "rf.importance", fw.perc = NUL
assertList(ddd, names = "named")

lrn = makeBaseWrapper(
id = paste(learner$id, "filtered", sep = "."),
id = stri_paste(learner$id, "filtered", sep = "."),
type = learner$type,
next.learner = learner,
package = filter$pkg,
Expand Down
6 changes: 3 additions & 3 deletions R/ModelMultiplexerParamSet.R
Expand Up @@ -56,13 +56,13 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) {
pid = p$id
# end of param name we need to find
long.pid.end = sprintf("\\.%s$", pid)
found = grep(long.pid.end, all.par.ids)
found = stri_subset_regex(all.par.ids, long.pid.end)
if (length(found) == 0L)
stopf("No param of id '%s' in any base learner!", pid)
if (length(found) > 1L)
stopf("Multiple params of id '%s' found in base learners, pass correctly grouped param sets!", pid)
# get the learner that is referenced from prefix of found string + add param to correct parset
for.learner = gsub(long.pid.end, "", all.par.ids[[found]])
for.learner = stri_replace(found, "", regex = long.pid.end)
for.pars = pss[[for.learner]]$pars
for.pars[[pid]] = p
pss[[for.learner]]$pars = for.pars
Expand All @@ -88,7 +88,7 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) {
pid = p$id
if (.check && (pid %nin% getParamIds(bl$par.set)))
stopf("No param of id '%s' in base learner '%s'!", pid, bl$id)
p$id = paste(bl$id, pid, sep = ".")
p$id = stri_paste(bl$id, pid, sep = ".")
p$requires = asQuoted(sprintf("selected.learner == '%s'", bl$id))
ps$pars[[j]] = p
}
Expand Down
2 changes: 1 addition & 1 deletion R/MulticlassWrapper.R
Expand Up @@ -38,7 +38,7 @@ makeMulticlassWrapper = function(learner, mcw.method = "onevsrest") {
checkFunction(mcw.method, args = "task")
)
pv = list(mcw.method = mcw.method)
id = paste(learner$id, "multiclass", sep = ".")
id = stri_paste(learner$id, "multiclass", sep = ".")

x = makeHomogeneousEnsemble(id = id, type = "classif", next.learner = learner,
package = learner$package, par.set = ps, par.vals = pv,
Expand Down
2 changes: 1 addition & 1 deletion R/MultilabelBinaryRelevanceWrapper.R
Expand Up @@ -42,7 +42,7 @@
#' }
makeMultilabelBinaryRelevanceWrapper = function(learner) {
learner = checkLearner(learner, type = "classif")
id = paste("multilabel", learner$id, sep = ".")
id = stri_paste("multilabel", learner$id, sep = ".")
packs = learner$package
x = makeHomogeneousEnsemble(id, learner$type, learner, packs,
learner.subclass = "MultilabelBinaryRelevanceWrapper", model.subclass = "MultilabelBinaryRelevanceModel")
Expand Down
2 changes: 1 addition & 1 deletion R/OverBaggingWrapper.R
Expand Up @@ -61,7 +61,7 @@ makeOverBaggingWrapper = function(learner, obw.iters = 10L, obw.rate = 1, obw.ma

if (learner$predict.type != "response")
stop("Predict type of the basic learner must be response.")
id = paste(learner$id, "overbagged", sep = ".")
id = stri_paste(learner$id, "overbagged", sep = ".")
packs = learner$package
ps = makeParamSet(
makeIntegerLearnerParam(id = "obw.iters", lower = 1L, default = 10L),
Expand Down
4 changes: 2 additions & 2 deletions R/OverUndersampleWrapper.R
Expand Up @@ -39,7 +39,7 @@ makeUndersampleWrapper = function(learner, usw.rate = 1, usw.cl = NULL) {
assertString(usw.cl)
pv$usw.cl = usw.cl
}
id = paste(learner$id, "undersampled", sep = ".")
id = stri_paste(learner$id, "undersampled", sep = ".")
ps = makeParamSet(
makeNumericLearnerParam(id = "usw.rate", lower = 0, upper = 1),
makeUntypedLearnerParam(id = "usw.cl", default = NULL, tunable = FALSE)
Expand All @@ -61,7 +61,7 @@ makeOversampleWrapper = function(learner, osw.rate = 1, osw.cl = NULL) {
assertString(osw.cl)
pv$osw.cl = osw.cl
}
id = paste(learner$id, "oversampled", sep = ".")
id = stri_paste(learner$id, "oversampled", sep = ".")
ps = makeParamSet (
makeNumericLearnerParam(id = "osw.rate", lower = 1),
makeUntypedLearnerParam(id = "osw.cl", default = NULL, tunable = FALSE)
Expand Down
6 changes: 3 additions & 3 deletions R/Prediction.R
Expand Up @@ -65,9 +65,9 @@ makePrediction.TaskDescClassif = function(task.desc, row.names, id, truth, predi
data$prob = y
data = as.data.frame(filterNull(data))
# fix columnnames for prob if strange chars are in factor levels
i = grep("prob.", names(data), fixed = TRUE)
if (length(i))
names(data)[i] = paste0("prob.", colnames(y))
indices = stri_detect_fixed(names(data), "prob.")
if (sum(indices) > 0)
names(data)[indices] = stri_paste("prob.", colnames(y))
}

p = makeS3Obj(c("PredictionClassif", "Prediction"),
Expand Down
8 changes: 4 additions & 4 deletions R/Prediction_operators.R
Expand Up @@ -49,14 +49,14 @@ getPredictionProbabilities = function(pred, cl) {
stop("Probabilities not present in Prediction object!")
cns = colnames(pred$data)
if (ttype %in% c("classif", "multilabel")) {
cl2 = paste("prob", cl, sep = ".")
cl2 = stri_paste("prob", cl, sep = ".")
if (!all(cl2 %in% cns))
stopf("Trying to get probabilities for nonexistant classes: %s", collapse(cl))
y = pred$data[, cl2]
if (length(cl) > 1L)
colnames(y) = cl
} else if (ttype == "cluster") {
y = pred$data[, grepl("prob\\.", cns)]
y = pred$data[, stri_detect_regex(cns, "prob\\.")]
colnames(y) = seq_col(y)
}
return(y)
Expand Down Expand Up @@ -111,7 +111,7 @@ getPredictionResponse.default = function(pred) {

#' @export
getPredictionResponse.PredictionMultilabel = function(pred) {
i = grepl("^response\\.", colnames(pred$data))
i = stri_detect_regex(colnames(pred$data), "^response\\.")
m = as.matrix(pred$data[, i])
setColNames(m, pred$task.desc$class.levels)
}
Expand Down Expand Up @@ -151,7 +151,7 @@ getPredictionTruth.PredictionSurv = function(pred) {

#' @export
getPredictionTruth.PredictionMultilabel = function(pred) {
i = grepl("^truth\\.", colnames(pred$data))
i = stri_detect_regex(colnames(pred$data), "^truth\\.")
m = as.matrix(pred$data[, i])
setColNames(m, pred$task.desc$class.levels)
}
2 changes: 1 addition & 1 deletion R/PreprocWrapper.R
Expand Up @@ -40,7 +40,7 @@ makePreprocWrapper = function(learner, train, predict, par.set = makeParamSet(),
if (!isProperlyNamed(par.vals))
stop("'par.vals' must be a properly named list!")

id = paste(learner$id, "preproc", sep = ".")
id = stri_paste(learner$id, "preproc", sep = ".")
x = makeBaseWrapper(id, type = learner$type, next.learner = learner, par.set = par.set,
par.vals = par.vals, learner.subclass = "PreprocWrapper", model.subclass = "PreprocModel")
x$train = train
Expand Down
2 changes: 1 addition & 1 deletion R/RLearner.R
Expand Up @@ -60,7 +60,7 @@ makeRLearnerInternal = function(id, type, package, par.set, par.vals, properties
# must do that before accessing par.set
# one case where lazy eval is actually helpful...
assertCharacter(package, any.missing = FALSE)
requirePackages(package, why = paste("learner", id), default.method = "load")
requirePackages(package, why = stri_paste("learner", id, sep = " "), default.method = "load")

assertString(id)
assertChoice(type, choices = c("classif", "regr", "multilabel", "surv", "cluster", "costsens"))
Expand Down
8 changes: 4 additions & 4 deletions R/RLearner_classif_avNNet.R
Expand Up @@ -31,11 +31,11 @@ trainLearner.classif.avNNet = function(.learner, .task, .subset, .weights = NULL
bag = FALSE

nms = names(.learner$par.vals)
ind = grep('repeats',nms)
if (length(ind)>0)
ind = stri_detect_regex(nms, "repeats")
if (sum(ind)>0)
repeats = .learner$par.vals[[ind]]
ind = grep('bag',nms)
if (length(ind)>0)
ind = stri_detect_regex(nms, "bag")
if (sum(ind)>0)
bag = .learner$par.vals[[ind]]

assertInt(repeats, lower = 1)
Expand Down
6 changes: 3 additions & 3 deletions R/RLearner_classif_dcSVM.R
Expand Up @@ -32,19 +32,19 @@ trainLearner.classif.dcSVM = function(.learner, .task, .subset, .weights = NULL,
pars = list(...)
m.flag = FALSE
max.levels.flag = FALSE
if (!any(grepl('m', names(pars)))) {
if (!any(stri_detect_regex(names(pars), 'm'))) {
m = 800
m.flag = TRUE
} else {
m = pars$m
}
if (!any(grepl('max.levels', names(pars)))) {
if (!any(stri_detect_regex(names(pars), 'max.levels'))) {
max.levels = 1
max.levels.flag = TRUE
} else {
max.levels = pars$max.levels
}
if (!any(grepl('k', names(pars)))) {
if (!any(stri_detect_regex(names(pars), 'k'))) {
k = 4
} else {
k = pars$k
Expand Down
5 changes: 3 additions & 2 deletions R/RLearner_classif_glmboost.R
Expand Up @@ -17,9 +17,10 @@ makeRLearner.classif.glmboost = function() {
properties = c("twoclass", "numerics", "factors", "prob", "weights"),
name = "Boosting for GLMs",
short.name = "glmbst",
note = paste(
note = stri_paste(
"`family` has been set to `Binomial()` by default.",
"Maximum number of boosting iterations is set via `mstop`, the actual number used for prediction is controlled by `m`."
"Maximum number of boosting iterations is set via `mstop`, the actual number used for prediction is controlled by `m`.",
sep = " "
)
)
}
Expand Down
8 changes: 5 additions & 3 deletions R/RLearner_classif_neuralnet.R
Expand Up @@ -54,10 +54,12 @@ trainLearner.classif.neuralnet = function(.learner, .task, .subset, .weights = N
if (!all(taskdat[[formula_head]]== 0 | taskdat[[formula_head]] == 1)){
taskdat[[formula_head]] = taskdat[[formula_head]]-1
}
if (sum(grepl('\\.',cf))>0){
if (sum(stri_detect_regex(cf, '\\.')) > 0){
varnames = nms[nms!=formula_head]
formula_head = paste('as.numeric(',formula_head,')~')
formula_expand = paste(formula_head, paste(varnames, collapse = "+"))
formula_head = stri_paste('as.numeric(',formula_head,')~', sep = " ")
formula_expand = stri_paste(formula_head,
stri_paste(varnames, collapse = "+", sep = " "),
sep = " ")
formula_expand = as.formula(formula_expand)
f = formula_expand
}
Expand Down
8 changes: 4 additions & 4 deletions R/RLearner_regr_avNNet.R
Expand Up @@ -35,11 +35,11 @@ trainLearner.regr.avNNet = function(.learner, .task, .subset, .weights = NULL, .
bag = FALSE

nms = names(.learner$par.vals)
ind = grep('repeats',nms)
if (length(ind)>0)
ind = stri_detect_regex(nms, "repeats")
if (sum(ind)>0)
repeats = .learner$par.vals[[ind]]
ind = grep('bag',nms)
if (length(ind)>0)
ind = stri_detect_regex(nms, "bag")
if (sum(ind)>0)
bag = .learner$par.vals[[ind]]

assertInt(repeats, lower = 1)
Expand Down
2 changes: 1 addition & 1 deletion R/RLearner_regr_mob.R
Expand Up @@ -37,7 +37,7 @@ trainLearner.regr.mob = function(.learner, .task, .subset, .weights = NULL, alph
term.feats = feats

target = getTaskTargetNames(.task)
f = as.formula(paste(target, "~", collapse(term.feats, sep = " + "), "|", collapse(part.feats, sep = " + ")))
f = as.formula(stri_paste(target, "~", collapse(term.feats, sep = " + "), "|", collapse(part.feats, sep = " + "), sep = " "))

if (is.null(.weights)) {
model = party::mob(f, data = getTaskData(.task, .subset), control = cntrl, ...)
Expand Down

0 comments on commit 3f6666e

Please sign in to comment.