#744 Use stringi internally (#904)

mlr-org · Jun 1, 2016 · 3f6666e · 3f6666e
1 parent 12cae17
commit 3f6666e
Show file tree

Hide file tree

Showing 58 changed files with 123 additions and 115 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -32,7 +32,8 @@ Depends:
     BBmisc (>= 1.9),
     ggplot2,
     ParamHelpers (>= 1.7),
-    stats
+    stats, 
+    stringi
 Imports:
     checkmate (>= 1.7.1),
     data.table,
@@ -42,7 +43,6 @@ Imports:
     plyr,
     reshape2,
     shiny,
-    stringi,
     survival
 Suggests:
     ada,

diff --git a/R/BaggingWrapper.R b/R/BaggingWrapper.R
@@ -62,7 +62,7 @@ makeBaggingWrapper = function(learner, bw.iters = 10L, bw.replace = TRUE, bw.siz
   }
   if (learner$predict.type != "response")
     stop("Predict type of the basic learner must be 'response'.")
-  id = paste(learner$id, "bagged", sep = ".")
+  id = stri_paste(learner$id, "bagged", sep = ".")
   packs = learner$package
   ps = makeParamSet(
     makeIntegerLearnerParam(id = "bw.iters", lower = 1L, default = 10L),

diff --git a/R/BaseEnsemble_operators.R b/R/BaseEnsemble_operators.R
@@ -1,8 +1,8 @@
 # find the learner for a given param name, so <learnerid>.<paramid>
 matchBaseEnsembleLearner = function(ensemble, pn) {
-  patterns = paste0("^", names(ensemble$base.learners), "\\.")
-  j = which(vlapply(patterns, function(p) grepl(p, pn)))
-  par.id = sub(patterns[j], "", pn)
+  patterns = stri_paste("^", names(ensemble$base.learners), "\\.")
+  j = which(vlapply(patterns, stri_detect_regex, str = pn))
+  par.id = stri_replace_first(pn, "", regex = patterns[j])
   list(index = j, par.id = par.id)
 }
 
@@ -11,7 +11,7 @@ getHyperPars.BaseEnsemble = function(learner, for.fun = c("train", "predict", "b
   pvs = lapply(learner$base.learners, function(lrn) {
     xs = getHyperPars.Learner(lrn, for.fun = for.fun)
     if (length(xs) > 0L)
-      names(xs) = paste0(lrn$id, ".", names(xs))
+      names(xs) = stri_paste(lrn$id, ".", names(xs))
     return(xs)
   })
   # if we dont do this, R prefixes the list names again.

diff --git a/R/BaseWrapper.R b/R/BaseWrapper.R
@@ -27,10 +27,10 @@ print.BaseWrapper = function(x, ...) {
   s = ""
   y = x
   while (inherits(y, "BaseWrapper")) {
-    s = paste(s, class(y)[1L], "->", sep = "")
+    s = stri_paste(s, class(y)[1L], "->", sep = "")
     y = y$next.learner
   }
-  s = paste(s, class(y)[1L])
+  s = stri_paste(s, class(y)[1L], sep = " ")
   print.Learner(x)
 }
 

diff --git a/R/ClassifTask.R b/R/ClassifTask.R
@@ -47,7 +47,7 @@ makeTaskDesc.ClassifTask = function(task, id, target, positive) {
   td$positive = positive
   td$negative = NA_character_
   if (length(td$class.levels) == 1L)
-    td$negative = paste0("not_", positive)
+    td$negative = stri_paste("not_", positive)
   else if (length(td$class.levels) == 2L)
     td$negative = setdiff(td$class.levels, positive)
   return(addClasses(td, c("TaskDescClassif", "TaskDescSupervised")))

diff --git a/R/CostSensClassifWrapper.R b/R/CostSensClassifWrapper.R
@@ -19,7 +19,7 @@
 makeCostSensClassifWrapper = function(learner) {
   learner = checkLearnerClassif(learner)
   learner = setPredictType(learner, "response")
-  id = paste("costsens", learner$id, sep = ".")
+  id = stri_paste("costsens", learner$id, sep = ".")
   makeBaseWrapper(id, "costsens", learner, package = learner$package,
     learner.subclass = "CostSensClassifWrapper", model.subclass = "CostSensClassifModel")
 }

diff --git a/R/CostSensRegrWrapper.R b/R/CostSensRegrWrapper.R
@@ -17,7 +17,7 @@ makeCostSensRegrWrapper = function(learner) {
   learner = checkLearnerRegr(learner)
   # we cannot make use of 'se' here
   learner = setPredictType(learner, "response")
-  id = paste("costsens", learner$id, sep = ".")
+  id = stri_paste("costsens", learner$id, sep = ".")
   makeHomogeneousEnsemble(id, type = "costsens", learner, package = learner$package,
     learner.subclass = "CostSensRegrWrapper", model.subclass = "CostSensRegrModel")
 }

diff --git a/R/CostSensTask.R b/R/CostSensTask.R
@@ -16,7 +16,7 @@ makeCostSensTask = function(id = deparse(substitute(data)), data, costs, blockin
     if (is.data.frame(costs))
       costs = as.matrix(costs)
     if (is.null(colnames(costs)))
-      colnames(costs) = paste0("y", seq_col(costs))
+      colnames(costs) = stri_paste("y", seq_col(costs))
   }
   task = makeSupervisedTask("costsens", data, target, weights, blocking, fixup.data = fixup.data, check.data = check.data)
   task$env$costs = costs

diff --git a/R/CostSensWeightedPairsWrapper.R b/R/CostSensWeightedPairsWrapper.R
@@ -22,7 +22,7 @@
 makeCostSensWeightedPairsWrapper = function(learner) {
   learner = checkLearnerClassif(learner, weights = TRUE)
   learner = setPredictType(learner, "response")
-  id = paste("costsens", learner$id, sep = ".")
+  id = stri_paste("costsens", learner$id, sep = ".")
   makeHomogeneousEnsemble(id, "costsens", learner, package = learner$package,
     learner.subclass = "CostSensWeightedPairsWrapper", model.subclass = "CostSensWeightedPairsModel")
 }

diff --git a/R/DownsampleWrapper.R b/R/DownsampleWrapper.R
@@ -29,7 +29,7 @@ makeDownsampleWrapper = function(learner, dw.perc = 1, dw.stratify = FALSE) {
     assertFlag(dw.stratify)
     pv$dw.stratify = dw.stratify
   }
-  id = paste(learner$id, "downsampled", sep = ".")
+  id = stri_paste(learner$id, "downsampled", sep = ".")
   ps = makeParamSet(
     makeNumericLearnerParam(id = "dw.perc", lower = 0, upper = 1, default = 1),
     makeLogicalLearnerParam(id = "dw.stratify", default = FALSE)

diff --git a/R/FeatSelWrapper.R b/R/FeatSelWrapper.R
@@ -44,7 +44,7 @@ makeFeatSelWrapper = function(learner, resampling, measures, bit.names, bits.to.
   }
   assertClass(control, classes = "FeatSelControl")
   assertFlag(show.info)
-  id = paste(learner$id, "featsel", sep = ".")
+  id = stri_paste(learner$id, "featsel", sep = ".")
   x = makeOptWrapper(id, learner, resampling, measures, makeParamSet(), bit.names,
     bits.to.features, control, show.info, "FeatSelWrapper", "FeatSelModel")
   # checkVarselParset(learner, par.set, bit.names, control)

diff --git a/R/Filter.R b/R/Filter.R
@@ -61,7 +61,7 @@ listFilterMethods = function(desc = TRUE, tasks = FALSE, features = FALSE) {
   tag2df = function(tags, prefix = "") {
     unique.tags = sort(unique(unlist(tags)))
     res = asMatrixRows(lapply(tags, "%in%", x = unique.tags))
-    colnames(res) = paste0(prefix, unique.tags)
+    colnames(res) = stri_paste(prefix, unique.tags)
     rownames(res) = NULL
     as.data.frame(res)
   }
@@ -344,7 +344,7 @@ makeFilter(
   fun = function(task, nselect, ...) {
     data = getTaskData(task)
     sapply(getTaskFeatureNames(task), function(feat.name) {
-      f = as.formula(paste0(feat.name,"~",getTaskTargetNames(task)))
+      f = as.formula(stri_paste(feat.name,"~",getTaskTargetNames(task)))
       aov.t = aov(f, data = data)
       summary(aov.t)[[1]][1,'F value']
     })
@@ -360,7 +360,7 @@ makeFilter(
   fun = function(task, nselect, ...) {
     data = getTaskData(task)
     sapply(getTaskFeatureNames(task), function(feat.name) {
-      f = as.formula(paste0(feat.name,"~", getTaskTargetNames(task)))
+      f = as.formula(stri_paste(feat.name,"~", getTaskTargetNames(task)))
       t = kruskal.test(f, data = data)
       unname(t$statistic)
     })

diff --git a/R/FilterWrapper.R b/R/FilterWrapper.R
@@ -53,7 +53,7 @@ makeFilterWrapper = function(learner, fw.method = "rf.importance", fw.perc = NUL
   assertList(ddd, names = "named")
 
   lrn = makeBaseWrapper(
-    id = paste(learner$id, "filtered", sep = "."),
+    id = stri_paste(learner$id, "filtered", sep = "."),
     type = learner$type,
     next.learner = learner,
     package = filter$pkg,

diff --git a/R/ModelMultiplexerParamSet.R b/R/ModelMultiplexerParamSet.R
@@ -56,13 +56,13 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) {
       pid = p$id
       # end of param name we need to find
       long.pid.end = sprintf("\\.%s$", pid)
-      found = grep(long.pid.end, all.par.ids)
+      found = stri_subset_regex(all.par.ids, long.pid.end)
       if (length(found) == 0L)
         stopf("No param of id '%s' in any base learner!", pid)
       if (length(found) > 1L)
         stopf("Multiple params of id '%s' found in base learners, pass correctly grouped param sets!", pid)
       # get the learner that is referenced from prefix of found string + add param to correct parset
-      for.learner = gsub(long.pid.end, "", all.par.ids[[found]])
+      for.learner = stri_replace(found, "", regex = long.pid.end)
       for.pars = pss[[for.learner]]$pars
       for.pars[[pid]] = p
       pss[[for.learner]]$pars = for.pars
@@ -88,7 +88,7 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) {
       pid = p$id
       if (.check && (pid %nin% getParamIds(bl$par.set)))
         stopf("No param of id '%s' in base learner '%s'!", pid, bl$id)
-      p$id = paste(bl$id, pid, sep = ".")
+      p$id = stri_paste(bl$id, pid, sep = ".")
       p$requires = asQuoted(sprintf("selected.learner == '%s'", bl$id))
       ps$pars[[j]] = p
     }

diff --git a/R/MulticlassWrapper.R b/R/MulticlassWrapper.R
@@ -38,7 +38,7 @@ makeMulticlassWrapper = function(learner, mcw.method = "onevsrest") {
     checkFunction(mcw.method, args = "task")
   )
   pv = list(mcw.method = mcw.method)
-  id = paste(learner$id, "multiclass", sep = ".")
+  id = stri_paste(learner$id, "multiclass", sep = ".")
 
   x = makeHomogeneousEnsemble(id = id, type = "classif", next.learner = learner,
     package = learner$package,  par.set = ps, par.vals = pv,

diff --git a/R/MultilabelBinaryRelevanceWrapper.R b/R/MultilabelBinaryRelevanceWrapper.R
@@ -42,7 +42,7 @@
 #' }
 makeMultilabelBinaryRelevanceWrapper = function(learner) {
   learner = checkLearner(learner, type = "classif")
-  id = paste("multilabel", learner$id, sep = ".")
+  id = stri_paste("multilabel", learner$id, sep = ".")
   packs = learner$package
   x = makeHomogeneousEnsemble(id, learner$type, learner, packs,
     learner.subclass = "MultilabelBinaryRelevanceWrapper", model.subclass = "MultilabelBinaryRelevanceModel")

diff --git a/R/OverBaggingWrapper.R b/R/OverBaggingWrapper.R
@@ -61,7 +61,7 @@ makeOverBaggingWrapper = function(learner, obw.iters = 10L, obw.rate = 1, obw.ma
 
   if (learner$predict.type != "response")
     stop("Predict type of the basic learner must be response.")
-  id = paste(learner$id, "overbagged", sep = ".")
+  id = stri_paste(learner$id, "overbagged", sep = ".")
   packs = learner$package
   ps = makeParamSet(
     makeIntegerLearnerParam(id = "obw.iters", lower = 1L, default = 10L),

diff --git a/R/OverUndersampleWrapper.R b/R/OverUndersampleWrapper.R
@@ -39,7 +39,7 @@ makeUndersampleWrapper = function(learner, usw.rate = 1, usw.cl = NULL) {
     assertString(usw.cl)
     pv$usw.cl = usw.cl
   }
-  id = paste(learner$id, "undersampled", sep = ".")
+  id = stri_paste(learner$id, "undersampled", sep = ".")
   ps = makeParamSet(
     makeNumericLearnerParam(id = "usw.rate", lower = 0, upper = 1),
     makeUntypedLearnerParam(id = "usw.cl", default = NULL, tunable = FALSE)
@@ -61,7 +61,7 @@ makeOversampleWrapper = function(learner, osw.rate = 1, osw.cl = NULL) {
     assertString(osw.cl)
     pv$osw.cl = osw.cl
   }
-  id = paste(learner$id, "oversampled", sep = ".")
+  id = stri_paste(learner$id, "oversampled", sep = ".")
   ps = makeParamSet (
     makeNumericLearnerParam(id = "osw.rate", lower = 1),
     makeUntypedLearnerParam(id = "osw.cl", default = NULL, tunable = FALSE)

diff --git a/R/Prediction.R b/R/Prediction.R
@@ -65,9 +65,9 @@ makePrediction.TaskDescClassif = function(task.desc, row.names, id, truth, predi
     data$prob = y
     data = as.data.frame(filterNull(data))
     # fix columnnames for prob if strange chars are in factor levels
-    i = grep("prob.", names(data), fixed = TRUE)
-    if (length(i))
-      names(data)[i] = paste0("prob.", colnames(y))
+    indices = stri_detect_fixed(names(data), "prob.")
+    if (sum(indices) > 0)
+      names(data)[indices] = stri_paste("prob.", colnames(y))
   }
 
   p = makeS3Obj(c("PredictionClassif", "Prediction"),

diff --git a/R/Prediction_operators.R b/R/Prediction_operators.R
@@ -49,14 +49,14 @@ getPredictionProbabilities = function(pred, cl) {
     stop("Probabilities not present in Prediction object!")
   cns = colnames(pred$data)
   if (ttype %in% c("classif", "multilabel")) {
-    cl2 = paste("prob", cl, sep = ".")
+    cl2 = stri_paste("prob", cl, sep = ".")
     if (!all(cl2 %in% cns))
       stopf("Trying to get probabilities for nonexistant classes: %s", collapse(cl))
     y = pred$data[, cl2]
     if (length(cl) > 1L)
       colnames(y) = cl
   } else if (ttype == "cluster") {
-    y = pred$data[, grepl("prob\\.", cns)]
+    y = pred$data[, stri_detect_regex(cns, "prob\\.")]
     colnames(y) = seq_col(y)
   }
   return(y)
@@ -111,7 +111,7 @@ getPredictionResponse.default = function(pred) {
 
 #' @export
 getPredictionResponse.PredictionMultilabel = function(pred) {
-  i = grepl("^response\\.", colnames(pred$data))
+  i = stri_detect_regex(colnames(pred$data), "^response\\.")
   m = as.matrix(pred$data[, i])
   setColNames(m, pred$task.desc$class.levels)
 }
@@ -151,7 +151,7 @@ getPredictionTruth.PredictionSurv = function(pred) {
 
 #' @export
 getPredictionTruth.PredictionMultilabel = function(pred) {
-  i = grepl("^truth\\.", colnames(pred$data))
+  i = stri_detect_regex(colnames(pred$data), "^truth\\.")
   m = as.matrix(pred$data[, i])
   setColNames(m, pred$task.desc$class.levels)
 }
diff --git a/R/PreprocWrapper.R b/R/PreprocWrapper.R
@@ -40,7 +40,7 @@ makePreprocWrapper = function(learner, train, predict, par.set = makeParamSet(),
   if (!isProperlyNamed(par.vals))
     stop("'par.vals' must be a properly named list!")
 
-  id = paste(learner$id, "preproc", sep = ".")
+  id = stri_paste(learner$id, "preproc", sep = ".")
   x = makeBaseWrapper(id, type = learner$type, next.learner = learner, par.set = par.set,
     par.vals = par.vals, learner.subclass = "PreprocWrapper", model.subclass = "PreprocModel")
   x$train = train

diff --git a/R/RLearner.R b/R/RLearner.R
@@ -60,7 +60,7 @@ makeRLearnerInternal = function(id, type, package, par.set, par.vals, properties
   # must do that before accessing par.set
   # one case where lazy eval is actually helpful...
   assertCharacter(package, any.missing = FALSE)
-  requirePackages(package, why = paste("learner", id), default.method = "load")
+  requirePackages(package, why = stri_paste("learner", id, sep = " "), default.method = "load")
 
   assertString(id)
   assertChoice(type, choices = c("classif", "regr", "multilabel", "surv", "cluster", "costsens"))

diff --git a/R/RLearner_classif_avNNet.R b/R/RLearner_classif_avNNet.R
@@ -31,11 +31,11 @@ trainLearner.classif.avNNet = function(.learner, .task, .subset, .weights = NULL
   bag = FALSE
 
   nms = names(.learner$par.vals)
-  ind = grep('repeats',nms)
-  if (length(ind)>0)
+  ind = stri_detect_regex(nms, "repeats")
+  if (sum(ind)>0)
     repeats = .learner$par.vals[[ind]]
-  ind = grep('bag',nms)
-  if (length(ind)>0)
+  ind = stri_detect_regex(nms, "bag")
+  if (sum(ind)>0)
     bag = .learner$par.vals[[ind]]
 
   assertInt(repeats, lower = 1)

diff --git a/R/RLearner_classif_dcSVM.R b/R/RLearner_classif_dcSVM.R
@@ -32,19 +32,19 @@ trainLearner.classif.dcSVM = function(.learner, .task, .subset, .weights = NULL,
   pars = list(...)
   m.flag = FALSE
   max.levels.flag = FALSE
-  if (!any(grepl('m', names(pars)))) {
+  if (!any(stri_detect_regex(names(pars), 'm'))) {
     m = 800
     m.flag = TRUE
   } else {
     m = pars$m
   }
-  if (!any(grepl('max.levels', names(pars)))) {
+  if (!any(stri_detect_regex(names(pars), 'max.levels'))) {
     max.levels = 1
     max.levels.flag = TRUE
   } else {
     max.levels = pars$max.levels
   }
-  if (!any(grepl('k', names(pars)))) {
+  if (!any(stri_detect_regex(names(pars), 'k'))) {
     k = 4
   } else {
     k = pars$k

diff --git a/R/RLearner_classif_glmboost.R b/R/RLearner_classif_glmboost.R
@@ -17,9 +17,10 @@ makeRLearner.classif.glmboost = function() {
     properties = c("twoclass", "numerics", "factors", "prob", "weights"),
     name = "Boosting for GLMs",
     short.name = "glmbst",
-    note = paste(
+    note = stri_paste(
       "`family` has been set to `Binomial()` by default.",
-      "Maximum number of boosting iterations is set via `mstop`, the actual number used for prediction is controlled by `m`."
+      "Maximum number of boosting iterations is set via `mstop`, the actual number used for prediction is controlled by `m`.",
+      sep = " "
     )
   )
 }

diff --git a/R/RLearner_classif_neuralnet.R b/R/RLearner_classif_neuralnet.R
@@ -54,10 +54,12 @@ trainLearner.classif.neuralnet = function(.learner, .task, .subset, .weights = N
   if (!all(taskdat[[formula_head]]== 0 | taskdat[[formula_head]] == 1)){
     taskdat[[formula_head]] = taskdat[[formula_head]]-1
   }
-  if (sum(grepl('\\.',cf))>0){
+  if (sum(stri_detect_regex(cf, '\\.')) > 0){
     varnames = nms[nms!=formula_head]
-    formula_head = paste('as.numeric(',formula_head,')~')
-    formula_expand = paste(formula_head, paste(varnames, collapse = "+"))
+    formula_head = stri_paste('as.numeric(',formula_head,')~', sep = " ")
+    formula_expand = stri_paste(formula_head, 
+                                stri_paste(varnames, collapse = "+", sep = " "), 
+                                sep = " ")
     formula_expand = as.formula(formula_expand)
     f = formula_expand
   }

diff --git a/R/RLearner_regr_avNNet.R b/R/RLearner_regr_avNNet.R
@@ -35,11 +35,11 @@ trainLearner.regr.avNNet = function(.learner, .task, .subset, .weights = NULL, .
   bag = FALSE
 
   nms = names(.learner$par.vals)
-  ind = grep('repeats',nms)
-  if (length(ind)>0)
+  ind = stri_detect_regex(nms, "repeats")
+  if (sum(ind)>0)
     repeats = .learner$par.vals[[ind]]
-  ind = grep('bag',nms)
-  if (length(ind)>0)
+  ind = stri_detect_regex(nms, "bag")
+  if (sum(ind)>0)
     bag = .learner$par.vals[[ind]]
 
   assertInt(repeats, lower = 1)

diff --git a/R/RLearner_regr_mob.R b/R/RLearner_regr_mob.R
@@ -37,7 +37,7 @@ trainLearner.regr.mob = function(.learner, .task, .subset, .weights = NULL, alph
     term.feats = feats
 
   target = getTaskTargetNames(.task)
-  f = as.formula(paste(target, "~", collapse(term.feats, sep = " + "), "|", collapse(part.feats, sep = " + ")))
+  f = as.formula(stri_paste(target, "~", collapse(term.feats, sep = " + "), "|", collapse(part.feats, sep = " + "), sep = " "))
 
   if (is.null(.weights)) {
     model = party::mob(f, data = getTaskData(.task, .subset), control = cntrl, ...)