Skip to content

Commit

Permalink
fix h2o classif predictions with numeric classes (fixes #1787) (#1790)
Browse files Browse the repository at this point in the history
* fix h2o classif predictions with numeric classes (fixes #1787)

* ...
  • Loading branch information
larskotthoff committed May 3, 2017
1 parent ebed05e commit b3f31ae
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 20 deletions.
8 changes: 3 additions & 5 deletions R/RLearner_classif_h2odeeplearning.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,9 @@ predictLearner.classif.h2o.deeplearning = function(.learner, .model, .newdata, .

# check if class names are integers. if yes, colnames of p.df need to be adapted
int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$")
if (any(int)) {
pcol = stri_detect_regex("^p[[:digit:]]+$", colnames(p.df))
if (any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)
}
pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$")
if (any(int) && any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)

if (.learner$predict.type == "response") {
return(p.df$predict)
Expand Down
8 changes: 3 additions & 5 deletions R/RLearner_classif_h2ogbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,9 @@ predictLearner.classif.h2o.gbm = function(.learner, .model, .newdata, ...) {

# check if class names are integers. if yes, colnames of p.df need to be adapted
int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$")
if (any(int)) {
pcol = stri_detect_regex("^p[[:digit:]]+$", colnames(p.df))
if (any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)
}
pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$")
if (any(int) && any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)

if (.learner$predict.type == "response") {
return(p.df$predict)
Expand Down
8 changes: 3 additions & 5 deletions R/RLearner_classif_h2oglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,9 @@ predictLearner.classif.h2o.glm = function(.learner, .model, .newdata, ...) {

# check if class names are integers. if yes, colnames of p.df need to be adapted
int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$")
if (any(int)) {
pcol = stri_detect_regex("^p[[:digit:]]+$", colnames(p.df))
if (any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)
}
pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$")
if (any(int) && any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)

if (.learner$predict.type == "response") {
return(p.df$predict)
Expand Down
8 changes: 3 additions & 5 deletions R/RLearner_classif_h2orandomForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ predictLearner.classif.h2o.randomForest = function(.learner, .model, .newdata, .

# check if class names are integers. if yes, colnames of p.df need to be adapted
int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$")
if (any(int)) {
pcol = stri_detect_regex("^p[[:digit:]]+$", colnames(p.df))
if (any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)
}
pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$")
if (any(int) && any(pcol))
colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L)

if (.learner$predict.type == "response") {
return(p.df$predict)
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test_classif_h2odeeplearning.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,16 @@ test_that("classif_h2odeeplearning", {

testProbParsets("classif.h2o.deeplearning", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list)
})

test_that("class names are integers and probabilities predicted (#1787)", {
df = data.frame(matrix(runif(100, 0, 1), 100, 9))
classx = factor(sample(c(0, 1), 100, replace = TRUE))
df = cbind(classx, df)

classif.task = makeClassifTask(id = "example", data = df, target = "classx")
gb.lrn = makeLearner("classif.h2o.deeplearning", predict.type = "prob")
rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE)
rin = makeResampleInstance(rdesc, task = classif.task)
r = resample(gb.lrn, classif.task, rin)
expect_false(is.null(r$pred))
})
13 changes: 13 additions & 0 deletions tests/testthat/test_classif_h2ogbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,16 @@ test_that("classif_h2ogbm", {

testProbParsets("classif.h2o.gbm", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list)
})

test_that("class names are integers and probabilities predicted (#1787)", {
df = data.frame(matrix(runif(100, 0, 1), 100, 9))
classx = factor(sample(c(0, 1), 100, replace = TRUE))
df = cbind(classx, df)

classif.task = makeClassifTask(id = "example", data = df, target = "classx")
gb.lrn = makeLearner("classif.h2o.gbm", predict.type = "prob")
rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE)
rin = makeResampleInstance(rdesc, task = classif.task)
r = resample(gb.lrn, classif.task, rin)
expect_false(is.null(r$pred))
})
13 changes: 13 additions & 0 deletions tests/testthat/test_classif_h2oglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,16 @@ test_that("classif_h2oglm", {

testProbParsets("classif.h2o.glm", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list)
})

test_that("class names are integers and probabilities predicted (#1787)", {
df = data.frame(matrix(runif(100, 0, 1), 100, 9))
classx = factor(sample(c(0, 1), 100, replace = TRUE))
df = cbind(classx, df)

classif.task = makeClassifTask(id = "example", data = df, target = "classx")
gb.lrn = makeLearner("classif.h2o.glm", predict.type = "prob")
rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE)
rin = makeResampleInstance(rdesc, task = classif.task)
r = resample(gb.lrn, classif.task, rin)
expect_false(is.null(r$pred))
})
13 changes: 13 additions & 0 deletions tests/testthat/test_classif_h2orandomForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,16 @@ test_that("classif_h2orandomForest", {
testProbParsets("classif.h2o.randomForest", binaryclass.df, binaryclass.target,
binaryclass.train.inds, old.probs.list, parset.list)
})

test_that("class names are integers and probabilities predicted (#1787)", {
df = data.frame(matrix(runif(100, 0, 1), 100, 9))
classx = factor(sample(c(0, 1), 100, replace = TRUE))
df = cbind(classx, df)

classif.task = makeClassifTask(id = "example", data = df, target = "classx")
gb.lrn = makeLearner("classif.h2o.randomForest", predict.type = "prob")
rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE)
rin = makeResampleInstance(rdesc, task = classif.task)
r = resample(gb.lrn, classif.task, rin)
expect_false(is.null(r$pred))
})

3 comments on commit b3f31ae

@lintr-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

R/RLearner_classif_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/RLearner_regr_GPfit.R:42:116: style: Do not place spaces around code in parentheses or square brackets.

res = GPfit::GP_fit(d$data[, not.const], d$target, corr = list(type = type, power = power, nu = matern_nu_k + 0.5 ), ...)
                                                                                                                   ^

R/RLearner_regr_glm.R:59:69: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, family = family, ... )
                                                                    ^

R/RLearner_regr_glm.R:61:89: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, weights = .weights, family = family, ... )
                                                                                        ^

R/RLearner_regr_randomForest.R:149:44: style: Do not place spaces around code in parentheses or square brackets.

bist = ((1 / se.ntree) - (1 / ntree)) / ( se.boot * se.ntree * (se.ntree - 1)) * bias
                                           ^

R/RLearner_regr_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/StackedLearner.R:556:50: style: Do not place spaces around code in parentheses or square brackets.

m = makeLearner("regr.nnet", predict.type = )
                                                 ^

R/StackedLearner.R:678:25: style: Do not place spaces around code in parentheses or square brackets.

neighbour = max.col( -dist.mat - diag(Inf, n))
                        ^

R/selectFeaturesRandom.R:15:30: style: Do not place spaces around code in parentheses or square brackets.

return(rbinom(n, 1, prob) )
                             ^

tests/testthat/test_base_learnerArgsToControl.R:15:22: style: Do not place spaces around code in parentheses or square brackets.

arg1 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:16:22: style: Do not place spaces around code in parentheses or square brackets.

arg2 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:41:24: style: Do not place spaces around code in parentheses or square brackets.

devmax = quote(expr = )
                       ^

tests/testthat/test_base_measures.R:448:58: style: Do not place spaces around code in parentheses or square brackets.

qsr.test = 1 - mean(rowSums((pred.probs - model.matrix( ~ . + 0, data = as.data.frame(tar.classif)))^2))
                                                         ^

tests/testthat/test_base_tuning.R:25:46: style: Do not place spaces around code in parentheses or square brackets.

j = which(pp$cp == cp & pp$minsplit == ms )
                                             ^

tests/testthat/test_classif_logreg.R:22:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.logreg", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_multinom.R:21:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.multinom", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_naiveBayes.R:16:100: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.naiveBayes", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                   ^

tests/testthat/test_classif_probit.R:19:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.probit", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_qda.R:18:93: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.qda", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                            ^

@lintr-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

R/RLearner_classif_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/RLearner_regr_GPfit.R:42:116: style: Do not place spaces around code in parentheses or square brackets.

res = GPfit::GP_fit(d$data[, not.const], d$target, corr = list(type = type, power = power, nu = matern_nu_k + 0.5 ), ...)
                                                                                                                   ^

R/RLearner_regr_glm.R:59:69: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, family = family, ... )
                                                                    ^

R/RLearner_regr_glm.R:61:89: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, weights = .weights, family = family, ... )
                                                                                        ^

R/RLearner_regr_randomForest.R:149:44: style: Do not place spaces around code in parentheses or square brackets.

bist = ((1 / se.ntree) - (1 / ntree)) / ( se.boot * se.ntree * (se.ntree - 1)) * bias
                                           ^

R/RLearner_regr_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/StackedLearner.R:556:50: style: Do not place spaces around code in parentheses or square brackets.

m = makeLearner("regr.nnet", predict.type = )
                                                 ^

R/StackedLearner.R:678:25: style: Do not place spaces around code in parentheses or square brackets.

neighbour = max.col( -dist.mat - diag(Inf, n))
                        ^

R/selectFeaturesRandom.R:15:30: style: Do not place spaces around code in parentheses or square brackets.

return(rbinom(n, 1, prob) )
                             ^

tests/testthat/test_base_learnerArgsToControl.R:15:22: style: Do not place spaces around code in parentheses or square brackets.

arg1 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:16:22: style: Do not place spaces around code in parentheses or square brackets.

arg2 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:41:24: style: Do not place spaces around code in parentheses or square brackets.

devmax = quote(expr = )
                       ^

tests/testthat/test_base_measures.R:448:58: style: Do not place spaces around code in parentheses or square brackets.

qsr.test = 1 - mean(rowSums((pred.probs - model.matrix( ~ . + 0, data = as.data.frame(tar.classif)))^2))
                                                         ^

tests/testthat/test_base_tuning.R:25:46: style: Do not place spaces around code in parentheses or square brackets.

j = which(pp$cp == cp & pp$minsplit == ms )
                                             ^

tests/testthat/test_classif_logreg.R:22:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.logreg", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_multinom.R:21:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.multinom", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_naiveBayes.R:16:100: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.naiveBayes", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                   ^

tests/testthat/test_classif_probit.R:19:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.probit", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_qda.R:18:93: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.qda", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                            ^

@lintr-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

R/RLearner_classif_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/RLearner_regr_GPfit.R:42:116: style: Do not place spaces around code in parentheses or square brackets.

res = GPfit::GP_fit(d$data[, not.const], d$target, corr = list(type = type, power = power, nu = matern_nu_k + 0.5 ), ...)
                                                                                                                   ^

R/RLearner_regr_glm.R:59:69: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, family = family, ... )
                                                                    ^

R/RLearner_regr_glm.R:61:89: style: Do not place spaces around code in parentheses or square brackets.

m = stats::glm(f, data = d, control = ctrl, weights = .weights, family = family, ... )
                                                                                        ^

R/RLearner_regr_randomForest.R:149:44: style: Do not place spaces around code in parentheses or square brackets.

bist = ((1 / se.ntree) - (1 / ntree)) / ( se.boot * se.ntree * (se.ntree - 1)) * bias
                                           ^

R/RLearner_regr_rknn.R:25:37: style: Do not place spaces around code in parentheses or square brackets.

​  c(list(data = z$data, y = z$target ), list(...))
                                    ^

R/StackedLearner.R:556:50: style: Do not place spaces around code in parentheses or square brackets.

m = makeLearner("regr.nnet", predict.type = )
                                                 ^

R/StackedLearner.R:678:25: style: Do not place spaces around code in parentheses or square brackets.

neighbour = max.col( -dist.mat - diag(Inf, n))
                        ^

R/selectFeaturesRandom.R:15:30: style: Do not place spaces around code in parentheses or square brackets.

return(rbinom(n, 1, prob) )
                             ^

tests/testthat/test_base_learnerArgsToControl.R:15:22: style: Do not place spaces around code in parentheses or square brackets.

arg1 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:16:22: style: Do not place spaces around code in parentheses or square brackets.

arg2 = quote(expr = )
                     ^

tests/testthat/test_base_learnerArgsToControl.R:41:24: style: Do not place spaces around code in parentheses or square brackets.

devmax = quote(expr = )
                       ^

tests/testthat/test_base_measures.R:448:58: style: Do not place spaces around code in parentheses or square brackets.

qsr.test = 1 - mean(rowSums((pred.probs - model.matrix( ~ . + 0, data = as.data.frame(tar.classif)))^2))
                                                         ^

tests/testthat/test_base_tuning.R:25:46: style: Do not place spaces around code in parentheses or square brackets.

j = which(pp$cp == cp & pp$minsplit == ms )
                                             ^

tests/testthat/test_classif_logreg.R:22:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.logreg", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_multinom.R:21:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.multinom", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_naiveBayes.R:16:100: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.naiveBayes", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                                   ^

tests/testthat/test_classif_probit.R:19:98: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.probit", binaryclass.df, binaryclass.target, tune.train = tt, tune.predict = tp )
                                                                                                 ^

tests/testthat/test_classif_qda.R:18:93: style: Do not place spaces around code in parentheses or square brackets.

​  testCV("classif.qda", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp )
                                                                                            ^

Please sign in to comment.