From bd101e80182963e3fbbe155e95f447d97dfa2f34 Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Wed, 23 Jan 2019 14:35:47 -0800
Subject: [PATCH 1/6] adding null model

---
 NAMESPACE                       |   5 +
 R/nullmodel.R                   | 165 ++++++++++++++++++++++++++++++++
 R/nullmodel_data.R              |  58 +++++++++++
 man/null_model.Rd               |  69 +++++++++++++
 man/nullmodel.Rd                |  47 +++++++++
 tests/testthat/test-nullmodel.R | 121 +++++++++++++++++++++++
 6 files changed, 465 insertions(+)
 create mode 100644 R/nullmodel.R
 create mode 100644 R/nullmodel_data.R
 create mode 100644 man/null_model.Rd
 create mode 100644 man/nullmodel.Rd
 create mode 100644 tests/testthat/test-nullmodel.R

diff --git a/NAMESPACE b/NAMESPACE
index 8ce771ce7..d201d151e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,10 +9,12 @@ S3method(multi_predict,"_lognet")
 S3method(multi_predict,"_multnet")
 S3method(multi_predict,"_xgb.Booster")
 S3method(multi_predict,default)
+S3method(null_model,default)
 S3method(predict,"_elnet")
 S3method(predict,"_lognet")
 S3method(predict,"_multnet")
 S3method(predict,model_fit)
+S3method(predict,null_model)
 S3method(predict_class,"_lognet")
 S3method(predict_class,model_fit)
 S3method(predict_classprob,"_lognet")
@@ -37,6 +39,7 @@ S3method(print,model_fit)
 S3method(print,model_spec)
 S3method(print,multinom_reg)
 S3method(print,nearest_neighbor)
+S3method(print,null_model)
 S3method(print,rand_forest)
 S3method(print,surv_reg)
 S3method(print,svm_poly)
@@ -95,6 +98,8 @@ export(model_printer)
 export(multi_predict)
 export(multinom_reg)
 export(nearest_neighbor)
+export(null_model)
+export(nullmodel)
 export(predict.model_fit)
 export(predict_class)
 export(predict_class.model_fit)
diff --git a/R/nullmodel.R b/R/nullmodel.R
new file mode 100644
index 000000000..5c564210c
--- /dev/null
+++ b/R/nullmodel.R
@@ -0,0 +1,165 @@
+#' Fit a simple, non-informative model
+#'
+#' Fit a single mean or largest class model
+#'
+#' \code{null_model} emulates other model building functions, but returns the
+#' simplest model possible given a training set: a single mean for numeric
+#' outcomes and the most prevalent class for factor outcomes. When class
+#' probabilities are requested, the percentage of the training set samples with
+#' the most prevalent class is returned.
+#'
+#' @aliases null_model null_model.default predict.null_model
+#' @param x An optional matrix or data frame of predictors. These values are
+#' not used in the model fit
+#' @param y A numeric vector (for regression) or factor (for classification) of
+#' outcomes
+#' @param \dots Optional arguments (not yet used)
+#' @param object An object of class \code{null_model}
+#' @param newdata A matrix or data frame of predictors (only used to determine
+#' the number of predictions to return)
+#' @param type Either "raw" (for regression), "class" or "prob" (for
+#' classification)
+#' @return The output of \code{null_model} is a list of class \code{null_model}
+#' with elements \item{call }{the function call} \item{value }{the mean of
+#' \code{y} or the most prevalent class} \item{levels }{when \code{y} is a
+#' factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y}
+#' is a factor, a data frame with a column for each class (\code{NULL}
+#' otherwise). The column for the most prevalent class has the proportion of
+#' the training samples with that class (the other columns are zero). } \item{n
+#' }{the number of elements in \code{y}}
+#'
+#' \code{predict.null_model} returns a either a factor or numeric vector
+#' depending on the class of \code{y}. All predictions are always the same.
+#' @keywords models
+#' @examples
+#'
+#' outcome <- factor(sample(letters[1:2],
+#'                          size = 100,
+#'                          prob = c(.1, .9),
+#'                          replace = TRUE))
+#' useless <- null_model(y = outcome)
+#' useless
+#' predict(useless, matrix(NA, nrow = 10))
+#'
+#' @export
+null_model <- function (x, ...) UseMethod("null_model")
+
+#' @export
+#' @rdname null_model
+null_model.default <- function(x = NULL, y, ...)
+{
+
+  if(is.factor(y))
+  {
+    lvls <- levels(y)
+    tab <- table(y)
+    value <- names(tab)[which.max(tab)]
+    pct <- tab/sum(tab)
+  } else {
+    lvls <- NULL
+    pct <- NULL
+    value <- mean(y, na.rm = TRUE)
+  }
+  structure(
+    list(call = match.call(),
+         value = value,
+         levels = lvls,
+         pct = pct,
+         n = length(y)),
+    class = "null_model")
+}
+
+#' @export
+#' @rdname null_model
+print.null_model <- function(x, ...)
+{
+  cat("Null",
+      ifelse(is.null(x$levels), "Classification", "Regression"),
+      "Model\n")
+  x$call
+
+  cat("Predicted Value:",
+      ifelse(is.null(x$levels), format(x$value), x$value),
+      "\n")
+}
+
+#' @export
+#' @rdname null_model
+predict.null_model <- function (object, newdata = NULL, type  = NULL, ...)
+{
+  if(is.null(type))
+  {
+    type <- if(is.null(object$levels)) "raw" else "class"
+  }
+
+  n <- if(is.null(newdata)) object$n else nrow(newdata)
+  if(!is.null(object$levels))
+  {
+    if(type == "prob")
+    {
+      out <- matrix(rep(object$pct, n), nrow = n, byrow = TRUE)
+      colnames(out) <- object$levels
+      out <- as.data.frame(out)
+    } else {
+      out <- factor(rep(object$value, n), levels = object$levels)
+    }
+  } else {
+    if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models")
+    out <- rep(object$value, n)
+  }
+  out
+}
+
+#' General Interface for null models
+#'
+#' `nullmodel` is a way to generate a _specification_ of a model before
+#'  fitting and allows the model to be created using R. It doens't have any
+#'  main arguments.
+#'
+#' @param mode A single character string for the type of model.
+#'  Possible values for this model are "unknown", "regression", or
+#'  "classification".
+#' @details The model can be created using the `fit()` function using the
+#'  following _engines_:
+#' \itemize{
+#' \item \pkg{R}:  `"parsnip"`
+#' }
+#'
+#' @section Engine Details:
+#'
+#' Engines may have pre-set default arguments when executing the
+#'  model fit call.  For this type of
+#'  model, the template of the fit calls are:
+#'
+#' \pkg{parsnip} classification
+#'
+#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "classification"), "parsnip")}
+#'
+#' \pkg{parsnip} regression
+#'
+#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "regression"), "parsnip")}
+#'
+#' @importFrom purrr map_lgl
+#' @seealso [varying()], [fit()]
+#' @examples
+#' nullmodel(mode = "regression")
+#' @export
+nullmodel <-
+  function(mode = "classification") {
+    # Check for correct mode
+    if (!(mode %in% nullmodel_modes))
+      stop("`mode` should be one of: ",
+           paste0("'", nullmodel_modes, "'", collapse = ", "),
+           call. = FALSE)
+
+    # Capture the arguments in quosures
+    args <- list()
+
+    # Save some empty slots for future parts of the specification
+    out <- list(args = args, eng_args = NULL,
+                mode = mode, method = NULL, engine = NULL)
+
+    # set classes in the correct order
+    class(out) <- make_classes("nullmodel")
+    out
+  }
diff --git a/R/nullmodel_data.R b/R/nullmodel_data.R
new file mode 100644
index 000000000..158bad2bb
--- /dev/null
+++ b/R/nullmodel_data.R
@@ -0,0 +1,58 @@
+nullmodel_arg_key <- data.frame(
+  parsnip   =  NULL,
+  row.names =  NULL,
+  stringsAsFactors = FALSE
+)
+
+nullmodel_modes <- c("classification", "regression", "unknown")
+
+nullmodel_engines <- data.frame(
+  parsnip   = c(TRUE, TRUE, FALSE),
+  row.names =  c("classification", "regression", "unknown")
+)
+
+# ------------------------------------------------------------------------------
+
+nullmodel_parsnip_data <-
+  list(
+    libs = "parsnip",
+    fit = list(
+      interface = "matrix",
+      protect = c("x", "y"),
+      func = c(fun = "null_model"),
+      defaults = list()
+      ),
+    class = list(
+        pre = NULL,
+        post = NULL,
+        func = c(fun = "predict"),
+        args =
+          list(
+            object = quote(object$fit),
+            new_data = quote(new_data),
+            type = "class"
+          )
+      ),
+    numeric = list(
+      pre = NULL,
+      post = NULL,
+      func = c(fun = "predict"),
+      args =
+        list(
+          object = quote(object$fit),
+          new_data = quote(new_data),
+          type = "raw"
+        )
+      ),
+    raw = list(
+      pre = NULL,
+      post = NULL,
+      func = c(fun = "predict"),
+      args =
+        list(
+          object = quote(object$fit),
+          new_data = quote(new_data),
+          type = "raw"
+        )
+      )
+    )
diff --git a/man/null_model.Rd b/man/null_model.Rd
new file mode 100644
index 000000000..bde665ef6
--- /dev/null
+++ b/man/null_model.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nullmodel.R
+\name{null_model}
+\alias{null_model}
+\alias{null_model.default}
+\alias{predict.null_model}
+\alias{print.null_model}
+\title{Fit a simple, non-informative model}
+\usage{
+null_model(x, ...)
+
+\method{null_model}{default}(x = NULL, y, ...)
+
+\method{print}{null_model}(x, ...)
+
+\method{predict}{null_model}(object, newdata = NULL, type = NULL, ...)
+}
+\arguments{
+\item{x}{An optional matrix or data frame of predictors. These values are
+not used in the model fit}
+
+\item{\dots}{Optional arguments (not yet used)}
+
+\item{y}{A numeric vector (for regression) or factor (for classification) of
+outcomes}
+
+\item{object}{An object of class \code{null_model}}
+
+\item{newdata}{A matrix or data frame of predictors (only used to determine
+the number of predictions to return)}
+
+\item{type}{Either "raw" (for regression), "class" or "prob" (for
+classification)}
+}
+\value{
+The output of \code{null_model} is a list of class \code{null_model}
+with elements \item{call }{the function call} \item{value }{the mean of
+\code{y} or the most prevalent class} \item{levels }{when \code{y} is a
+factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y}
+is a factor, a data frame with a column for each class (\code{NULL}
+otherwise). The column for the most prevalent class has the proportion of
+the training samples with that class (the other columns are zero). } \item{n
+}{the number of elements in \code{y}}
+
+\code{predict.null_model} returns a either a factor or numeric vector
+depending on the class of \code{y}. All predictions are always the same.
+}
+\description{
+Fit a single mean or largest class model
+}
+\details{
+\code{null_model} emulates other model building functions, but returns the
+simplest model possible given a training set: a single mean for numeric
+outcomes and the most prevalent class for factor outcomes. When class
+probabilities are requested, the percentage of the training set samples with
+the most prevalent class is returned.
+}
+\examples{
+
+outcome <- factor(sample(letters[1:2],
+                         size = 100,
+                         prob = c(.1, .9),
+                         replace = TRUE))
+useless <- null_model(y = outcome)
+useless
+predict(useless, matrix(NA, nrow = 10))
+
+}
+\keyword{models}
diff --git a/man/nullmodel.Rd b/man/nullmodel.Rd
new file mode 100644
index 000000000..65445b0ca
--- /dev/null
+++ b/man/nullmodel.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nullmodel.R
+\name{nullmodel}
+\alias{nullmodel}
+\title{General Interface for null models}
+\usage{
+nullmodel(mode = "classification")
+}
+\arguments{
+\item{mode}{A single character string for the type of model.
+Possible values for this model are "unknown", "regression", or
+"classification".}
+}
+\description{
+\code{nullmodel} is a way to generate a \emph{specification} of a model before
+fitting and allows the model to be created using R. It doens't have any
+main arguments.
+}
+\details{
+The model can be created using the \code{fit()} function using the
+following \emph{engines}:
+\itemize{
+\item \pkg{R}:  \code{"parsnip"}
+}
+}
+\section{Engine Details}{
+
+
+Engines may have pre-set default arguments when executing the
+model fit call.  For this type of
+model, the template of the fit calls are:
+
+\pkg{parsnip} classification
+
+\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "classification"), "parsnip")}
+
+\pkg{parsnip} regression
+
+\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "regression"), "parsnip")}
+}
+
+\examples{
+nullmodel(mode = "regression")
+}
+\seealso{
+\code{\link[=varying]{varying()}}, \code{\link[=fit]{fit()}}
+}
diff --git a/tests/testthat/test-nullmodel.R b/tests/testthat/test-nullmodel.R
new file mode 100644
index 000000000..436095baa
--- /dev/null
+++ b/tests/testthat/test-nullmodel.R
@@ -0,0 +1,121 @@
+library(testthat)
+library(parsnip)
+library(rlang)
+library(tibble)
+
+context("test-nullmodel")
+source("helpers.R")
+
+
+test_that('primary arguments', {
+  basic <- nullmodel(mode = "regression")
+  basic_nullmodel <- translate(basic %>% set_engine("parsnip"))
+  expect_equal(basic_nullmodel$method$fit$args,
+               list(
+                 x = expr(missing_arg()),
+                 y = expr(missing_arg())
+               )
+  )
+})
+
+
+test_that('engine arguments', {
+  nullmodel_keep <- nullmodel(mode = "regression")
+  expect_equal(translate(nullmodel_keep %>% set_engine("parsnip", keepxy = FALSE))$method$fit$args,
+               list(
+                 x = expr(missing_arg()),
+                 y = expr(missing_arg()),
+                 keepxy = new_empty_quosure(FALSE)
+               )
+  )
+})
+
+test_that('bad input', {
+  expect_error(translate(nullmodel() %>% set_engine("wat?")))
+  expect_error(translate(nullmodel(mode = "regression") %>% set_engine()))
+  expect_error(translate(nullmodel(formula = y ~ x)))
+  expect_warning(
+    translate(
+      nullmodel(mode = "regression") %>% set_engine("parsnip", x = iris[,1:3], y = iris$Species)
+    )
+  )
+})
+
+# ------------------------------------------------------------------------------
+
+num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
+iris_bad_form <- as.formula(Species ~ term)
+iris_basic <- nullmodel(mode = "regression") %>% set_engine("parsnip")
+
+# ------------------------------------------------------------------------------
+
+test_that('nullmodel execution', {
+
+  expect_error(
+    res <- fit(
+      iris_basic,
+      Sepal.Length ~ log(Sepal.Width) + Species,
+      data = iris
+    ),
+    regexp = NA
+  )
+  expect_error(
+    res <- fit_xy(
+      iris_basic,
+      x = iris[, num_pred],
+      y = iris$Sepal.Length
+    ),
+    regexp = NA
+  )
+
+  expect_error(
+    res <- fit(
+      iris_basic,
+      iris_bad_form,
+      data = iris
+    )
+  )
+
+})
+
+test_that('nullmodel prediction', {
+
+  uni_pred <- tibble(.pred = rep(5.843333, 5))
+  inl_pred <- rep(5.843333, 5)
+
+  mv_pred <-
+    structure(
+      list(Sepal.Width = seq_len(10),
+           Petal.Width = seq_len(10)),
+      class = "data.frame",
+      row.names = c(NA, -10L))
+
+  res_xy <- fit_xy(
+    iris_basic,
+    x = iris[, num_pred],
+    y = iris$Sepal.Length
+  )
+
+  expect_equal(uni_pred, predict(res_xy, new_data = iris[1, num_pred]))
+
+  res_form <- fit(
+    iris_basic,
+    Sepal.Length ~ log(Sepal.Width) + Species,
+    data = iris
+  )
+  expect_equal(inl_pred, predict_numeric(res_form, iris[1:5, ]))
+})
+
+# ------------------------------------------------------------------------------
+
+test_that('classification', {
+
+  expect_error(
+    nullmodel <- nullmodel(mode = "classification") %>%
+      set_engine("parsnip") %>%
+      fit(Species ~ ., data = iris),
+    regexp = NA
+  )
+  expect_true(!is.null(nullmodel$fit))
+})
+

From 47423935dc78b9c6255e83dd6ba7b442ac38feae Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Thu, 24 Jan 2019 20:21:51 -0800
Subject: [PATCH 2/6] name change nullmodel -> null_model

---
 NAMESPACE                       |  6 +--
 R/nullmodel.R                   | 46 +++++++++---------
 R/nullmodel_data.R              | 10 ++--
 man/null_model.Rd               | 84 ++++++++++++---------------------
 man/nullmodel.Rd                | 84 +++++++++++++++++++++------------
 tests/testthat/test-nullmodel.R | 18 +++----
 6 files changed, 124 insertions(+), 124 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d201d151e..6cbc69a7c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,12 +9,12 @@ S3method(multi_predict,"_lognet")
 S3method(multi_predict,"_multnet")
 S3method(multi_predict,"_xgb.Booster")
 S3method(multi_predict,default)
-S3method(null_model,default)
+S3method(nullmodel,default)
 S3method(predict,"_elnet")
 S3method(predict,"_lognet")
 S3method(predict,"_multnet")
 S3method(predict,model_fit)
-S3method(predict,null_model)
+S3method(predict,nullmodel)
 S3method(predict_class,"_lognet")
 S3method(predict_class,model_fit)
 S3method(predict_classprob,"_lognet")
@@ -39,7 +39,7 @@ S3method(print,model_fit)
 S3method(print,model_spec)
 S3method(print,multinom_reg)
 S3method(print,nearest_neighbor)
-S3method(print,null_model)
+S3method(print,nullmodel)
 S3method(print,rand_forest)
 S3method(print,surv_reg)
 S3method(print,svm_poly)
diff --git a/R/nullmodel.R b/R/nullmodel.R
index 5c564210c..3c9cb5f00 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -2,24 +2,24 @@
 #'
 #' Fit a single mean or largest class model
 #'
-#' \code{null_model} emulates other model building functions, but returns the
+#' \code{nullmodel} emulates other model building functions, but returns the
 #' simplest model possible given a training set: a single mean for numeric
 #' outcomes and the most prevalent class for factor outcomes. When class
 #' probabilities are requested, the percentage of the training set samples with
 #' the most prevalent class is returned.
 #'
-#' @aliases null_model null_model.default predict.null_model
+#' @aliases nullmodel nullmodel.default predict.nullmodel
 #' @param x An optional matrix or data frame of predictors. These values are
 #' not used in the model fit
 #' @param y A numeric vector (for regression) or factor (for classification) of
 #' outcomes
 #' @param \dots Optional arguments (not yet used)
-#' @param object An object of class \code{null_model}
+#' @param object An object of class \code{nullmodel}
 #' @param newdata A matrix or data frame of predictors (only used to determine
 #' the number of predictions to return)
 #' @param type Either "raw" (for regression), "class" or "prob" (for
 #' classification)
-#' @return The output of \code{null_model} is a list of class \code{null_model}
+#' @return The output of \code{nullmodel} is a list of class \code{nullmodel}
 #' with elements \item{call }{the function call} \item{value }{the mean of
 #' \code{y} or the most prevalent class} \item{levels }{when \code{y} is a
 #' factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y}
@@ -28,7 +28,7 @@
 #' the training samples with that class (the other columns are zero). } \item{n
 #' }{the number of elements in \code{y}}
 #'
-#' \code{predict.null_model} returns a either a factor or numeric vector
+#' \code{predict.nullmodel} returns a either a factor or numeric vector
 #' depending on the class of \code{y}. All predictions are always the same.
 #' @keywords models
 #' @examples
@@ -37,16 +37,16 @@
 #'                          size = 100,
 #'                          prob = c(.1, .9),
 #'                          replace = TRUE))
-#' useless <- null_model(y = outcome)
+#' useless <- nullmodel(y = outcome)
 #' useless
-#' predict(useless, matrix(NA, nrow = 10))
+#' predict(useless, matrix(NA, nrow = 5))
 #'
 #' @export
-null_model <- function (x, ...) UseMethod("null_model")
+nullmodel <- function (x, ...) UseMethod("nullmodel")
 
 #' @export
-#' @rdname null_model
-null_model.default <- function(x = NULL, y, ...)
+#' @rdname nullmodel
+nullmodel.default <- function(x = NULL, y, ...)
 {
 
   if(is.factor(y))
@@ -66,12 +66,12 @@ null_model.default <- function(x = NULL, y, ...)
          levels = lvls,
          pct = pct,
          n = length(y)),
-    class = "null_model")
+    class = "nullmodel")
 }
 
 #' @export
-#' @rdname null_model
-print.null_model <- function(x, ...)
+#' @rdname nullmodel
+print.nullmodel <- function(x, ...)
 {
   cat("Null",
       ifelse(is.null(x$levels), "Classification", "Regression"),
@@ -84,8 +84,8 @@ print.null_model <- function(x, ...)
 }
 
 #' @export
-#' @rdname null_model
-predict.null_model <- function (object, newdata = NULL, type  = NULL, ...)
+#' @rdname nullmodel
+predict.nullmodel <- function (object, newdata = NULL, type  = NULL, ...)
 {
   if(is.null(type))
   {
@@ -112,7 +112,7 @@ predict.null_model <- function (object, newdata = NULL, type  = NULL, ...)
 
 #' General Interface for null models
 #'
-#' `nullmodel` is a way to generate a _specification_ of a model before
+#' `null_model` is a way to generate a _specification_ of a model before
 #'  fitting and allows the model to be created using R. It doens't have any
 #'  main arguments.
 #'
@@ -133,23 +133,23 @@ predict.null_model <- function (object, newdata = NULL, type  = NULL, ...)
 #'
 #' \pkg{parsnip} classification
 #'
-#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "classification"), "parsnip")}
+#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "classification"), "parsnip")}
 #'
 #' \pkg{parsnip} regression
 #'
-#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "regression"), "parsnip")}
+#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "regression"), "parsnip")}
 #'
 #' @importFrom purrr map_lgl
 #' @seealso [varying()], [fit()]
 #' @examples
-#' nullmodel(mode = "regression")
+#' null_model(mode = "regression")
 #' @export
-nullmodel <-
+null_model <-
   function(mode = "classification") {
     # Check for correct mode
-    if (!(mode %in% nullmodel_modes))
+    if (!(mode %in% null_model_modes))
       stop("`mode` should be one of: ",
-           paste0("'", nullmodel_modes, "'", collapse = ", "),
+           paste0("'", null_model_modes, "'", collapse = ", "),
            call. = FALSE)
 
     # Capture the arguments in quosures
@@ -160,6 +160,6 @@ nullmodel <-
                 mode = mode, method = NULL, engine = NULL)
 
     # set classes in the correct order
-    class(out) <- make_classes("nullmodel")
+    class(out) <- make_classes("null_model")
     out
   }
diff --git a/R/nullmodel_data.R b/R/nullmodel_data.R
index 158bad2bb..632ad609a 100644
--- a/R/nullmodel_data.R
+++ b/R/nullmodel_data.R
@@ -1,25 +1,25 @@
-nullmodel_arg_key <- data.frame(
+null_model_arg_key <- data.frame(
   parsnip   =  NULL,
   row.names =  NULL,
   stringsAsFactors = FALSE
 )
 
-nullmodel_modes <- c("classification", "regression", "unknown")
+null_model_modes <- c("classification", "regression", "unknown")
 
-nullmodel_engines <- data.frame(
+null_model_engines <- data.frame(
   parsnip   = c(TRUE, TRUE, FALSE),
   row.names =  c("classification", "regression", "unknown")
 )
 
 # ------------------------------------------------------------------------------
 
-nullmodel_parsnip_data <-
+null_model_parsnip_data <-
   list(
     libs = "parsnip",
     fit = list(
       interface = "matrix",
       protect = c("x", "y"),
-      func = c(fun = "null_model"),
+      func = c(fun = "nullmodel"),
       defaults = list()
       ),
     class = list(
diff --git a/man/null_model.Rd b/man/null_model.Rd
index bde665ef6..d1b709214 100644
--- a/man/null_model.Rd
+++ b/man/null_model.Rd
@@ -2,68 +2,46 @@
 % Please edit documentation in R/nullmodel.R
 \name{null_model}
 \alias{null_model}
-\alias{null_model.default}
-\alias{predict.null_model}
-\alias{print.null_model}
-\title{Fit a simple, non-informative model}
+\title{General Interface for null models}
 \usage{
-null_model(x, ...)
-
-\method{null_model}{default}(x = NULL, y, ...)
-
-\method{print}{null_model}(x, ...)
-
-\method{predict}{null_model}(object, newdata = NULL, type = NULL, ...)
+null_model(mode = "classification")
 }
 \arguments{
-\item{x}{An optional matrix or data frame of predictors. These values are
-not used in the model fit}
+\item{mode}{A single character string for the type of model.
+Possible values for this model are "unknown", "regression", or
+"classification".}
+}
+\description{
+\code{null_model} is a way to generate a \emph{specification} of a model before
+fitting and allows the model to be created using R. It doens't have any
+main arguments.
+}
+\details{
+The model can be created using the \code{fit()} function using the
+following \emph{engines}:
+\itemize{
+\item \pkg{R}:  \code{"parsnip"}
+}
+}
+\section{Engine Details}{
 
-\item{\dots}{Optional arguments (not yet used)}
 
-\item{y}{A numeric vector (for regression) or factor (for classification) of
-outcomes}
+Engines may have pre-set default arguments when executing the
+model fit call.  For this type of
+model, the template of the fit calls are:
 
-\item{object}{An object of class \code{null_model}}
+\pkg{parsnip} classification
 
-\item{newdata}{A matrix or data frame of predictors (only used to determine
-the number of predictions to return)}
+\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "classification"), "parsnip")}
 
-\item{type}{Either "raw" (for regression), "class" or "prob" (for
-classification)}
-}
-\value{
-The output of \code{null_model} is a list of class \code{null_model}
-with elements \item{call }{the function call} \item{value }{the mean of
-\code{y} or the most prevalent class} \item{levels }{when \code{y} is a
-factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y}
-is a factor, a data frame with a column for each class (\code{NULL}
-otherwise). The column for the most prevalent class has the proportion of
-the training samples with that class (the other columns are zero). } \item{n
-}{the number of elements in \code{y}}
+\pkg{parsnip} regression
 
-\code{predict.null_model} returns a either a factor or numeric vector
-depending on the class of \code{y}. All predictions are always the same.
-}
-\description{
-Fit a single mean or largest class model
-}
-\details{
-\code{null_model} emulates other model building functions, but returns the
-simplest model possible given a training set: a single mean for numeric
-outcomes and the most prevalent class for factor outcomes. When class
-probabilities are requested, the percentage of the training set samples with
-the most prevalent class is returned.
+\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "regression"), "parsnip")}
 }
-\examples{
-
-outcome <- factor(sample(letters[1:2],
-                         size = 100,
-                         prob = c(.1, .9),
-                         replace = TRUE))
-useless <- null_model(y = outcome)
-useless
-predict(useless, matrix(NA, nrow = 10))
 
+\examples{
+null_model(mode = "regression")
+}
+\seealso{
+\code{\link[=varying]{varying()}}, \code{\link[=fit]{fit()}}
 }
-\keyword{models}
diff --git a/man/nullmodel.Rd b/man/nullmodel.Rd
index 65445b0ca..cffbd87cb 100644
--- a/man/nullmodel.Rd
+++ b/man/nullmodel.Rd
@@ -2,46 +2,68 @@
 % Please edit documentation in R/nullmodel.R
 \name{nullmodel}
 \alias{nullmodel}
-\title{General Interface for null models}
+\alias{nullmodel.default}
+\alias{predict.nullmodel}
+\alias{print.nullmodel}
+\title{Fit a simple, non-informative model}
 \usage{
-nullmodel(mode = "classification")
+nullmodel(x, ...)
+
+\method{nullmodel}{default}(x = NULL, y, ...)
+
+\method{print}{nullmodel}(x, ...)
+
+\method{predict}{nullmodel}(object, newdata = NULL, type = NULL, ...)
 }
 \arguments{
-\item{mode}{A single character string for the type of model.
-Possible values for this model are "unknown", "regression", or
-"classification".}
-}
-\description{
-\code{nullmodel} is a way to generate a \emph{specification} of a model before
-fitting and allows the model to be created using R. It doens't have any
-main arguments.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}:  \code{"parsnip"}
-}
-}
-\section{Engine Details}{
+\item{x}{An optional matrix or data frame of predictors. These values are
+not used in the model fit}
 
+\item{\dots}{Optional arguments (not yet used)}
 
-Engines may have pre-set default arguments when executing the
-model fit call.  For this type of
-model, the template of the fit calls are:
+\item{y}{A numeric vector (for regression) or factor (for classification) of
+outcomes}
 
-\pkg{parsnip} classification
+\item{object}{An object of class \code{nullmodel}}
 
-\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "classification"), "parsnip")}
+\item{newdata}{A matrix or data frame of predictors (only used to determine
+the number of predictions to return)}
 
-\pkg{parsnip} regression
-
-\Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nullmodel(mode = "regression"), "parsnip")}
+\item{type}{Either "raw" (for regression), "class" or "prob" (for
+classification)}
 }
+\value{
+The output of \code{nullmodel} is a list of class \code{nullmodel}
+with elements \item{call }{the function call} \item{value }{the mean of
+\code{y} or the most prevalent class} \item{levels }{when \code{y} is a
+factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y}
+is a factor, a data frame with a column for each class (\code{NULL}
+otherwise). The column for the most prevalent class has the proportion of
+the training samples with that class (the other columns are zero). } \item{n
+}{the number of elements in \code{y}}
 
-\examples{
-nullmodel(mode = "regression")
+\code{predict.nullmodel} returns a either a factor or numeric vector
+depending on the class of \code{y}. All predictions are always the same.
 }
-\seealso{
-\code{\link[=varying]{varying()}}, \code{\link[=fit]{fit()}}
+\description{
+Fit a single mean or largest class model
+}
+\details{
+\code{nullmodel} emulates other model building functions, but returns the
+simplest model possible given a training set: a single mean for numeric
+outcomes and the most prevalent class for factor outcomes. When class
+probabilities are requested, the percentage of the training set samples with
+the most prevalent class is returned.
+}
+\examples{
+
+outcome <- factor(sample(letters[1:2],
+                         size = 100,
+                         prob = c(.1, .9),
+                         replace = TRUE))
+useless <- nullmodel(y = outcome)
+useless
+predict(useless, matrix(NA, nrow = 5))
+
 }
+\keyword{models}
diff --git a/tests/testthat/test-nullmodel.R b/tests/testthat/test-nullmodel.R
index 436095baa..14f2af5d7 100644
--- a/tests/testthat/test-nullmodel.R
+++ b/tests/testthat/test-nullmodel.R
@@ -8,7 +8,7 @@ source("helpers.R")
 
 
 test_that('primary arguments', {
-  basic <- nullmodel(mode = "regression")
+  basic <- null_model(mode = "regression")
   basic_nullmodel <- translate(basic %>% set_engine("parsnip"))
   expect_equal(basic_nullmodel$method$fit$args,
                list(
@@ -20,7 +20,7 @@ test_that('primary arguments', {
 
 
 test_that('engine arguments', {
-  nullmodel_keep <- nullmodel(mode = "regression")
+  nullmodel_keep <- null_model(mode = "regression")
   expect_equal(translate(nullmodel_keep %>% set_engine("parsnip", keepxy = FALSE))$method$fit$args,
                list(
                  x = expr(missing_arg()),
@@ -31,12 +31,12 @@ test_that('engine arguments', {
 })
 
 test_that('bad input', {
-  expect_error(translate(nullmodel() %>% set_engine("wat?")))
-  expect_error(translate(nullmodel(mode = "regression") %>% set_engine()))
-  expect_error(translate(nullmodel(formula = y ~ x)))
+  expect_error(translate(null_model() %>% set_engine("wat?")))
+  expect_error(translate(null_model(mode = "regression") %>% set_engine()))
+  expect_error(translate(null_model(formula = y ~ x)))
   expect_warning(
     translate(
-      nullmodel(mode = "regression") %>% set_engine("parsnip", x = iris[,1:3], y = iris$Species)
+      null_model(mode = "regression") %>% set_engine("parsnip", x = iris[,1:3], y = iris$Species)
     )
   )
 })
@@ -45,7 +45,7 @@ test_that('bad input', {
 
 num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
 iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- nullmodel(mode = "regression") %>% set_engine("parsnip")
+iris_basic <- null_model(mode = "regression") %>% set_engine("parsnip")
 
 # ------------------------------------------------------------------------------
 
@@ -111,11 +111,11 @@ test_that('nullmodel prediction', {
 test_that('classification', {
 
   expect_error(
-    nullmodel <- nullmodel(mode = "classification") %>%
+    null_model <- null_model(mode = "classification") %>%
       set_engine("parsnip") %>%
       fit(Species ~ ., data = iris),
     regexp = NA
   )
-  expect_true(!is.null(nullmodel$fit))
+  expect_true(!is.null(null_model$fit))
 })
 

From 3723c877a2d147cb2cc98a07f2b805be36be4bf2 Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 25 Jan 2019 17:35:32 -0800
Subject: [PATCH 3/6] linting + length of predict fix

---
 R/nullmodel.R                   | 16 ++++++----------
 R/nullmodel_data.R              |  2 +-
 man/nullmodel.Rd                |  4 ++--
 tests/testthat/test-nullmodel.R | 21 +++++++--------------
 4 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/R/nullmodel.R b/R/nullmodel.R
index 3c9cb5f00..7cb8babd5 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -15,7 +15,7 @@
 #' outcomes
 #' @param \dots Optional arguments (not yet used)
 #' @param object An object of class \code{nullmodel}
-#' @param newdata A matrix or data frame of predictors (only used to determine
+#' @param new_data A matrix or data frame of predictors (only used to determine
 #' the number of predictions to return)
 #' @param type Either "raw" (for regression), "class" or "prob" (for
 #' classification)
@@ -85,18 +85,14 @@ print.nullmodel <- function(x, ...)
 
 #' @export
 #' @rdname nullmodel
-predict.nullmodel <- function (object, newdata = NULL, type  = NULL, ...)
-{
-  if(is.null(type))
-  {
+predict.nullmodel <- function (object, new_data = NULL, type  = NULL, ...) {
+  if(is.null(type)) {
     type <- if(is.null(object$levels)) "raw" else "class"
   }
 
-  n <- if(is.null(newdata)) object$n else nrow(newdata)
-  if(!is.null(object$levels))
-  {
-    if(type == "prob")
-    {
+  n <- if(is.null(new_data)) object$n else nrow(new_data)
+  if(!is.null(object$levels)) {
+    if(type == "prob") {
       out <- matrix(rep(object$pct, n), nrow = n, byrow = TRUE)
       colnames(out) <- object$levels
       out <- as.data.frame(out)
diff --git a/R/nullmodel_data.R b/R/nullmodel_data.R
index 632ad609a..3de382220 100644
--- a/R/nullmodel_data.R
+++ b/R/nullmodel_data.R
@@ -41,7 +41,7 @@ null_model_parsnip_data <-
         list(
           object = quote(object$fit),
           new_data = quote(new_data),
-          type = "raw"
+          type = "numeric"
         )
       ),
     raw = list(
diff --git a/man/nullmodel.Rd b/man/nullmodel.Rd
index cffbd87cb..d13596d93 100644
--- a/man/nullmodel.Rd
+++ b/man/nullmodel.Rd
@@ -13,7 +13,7 @@ nullmodel(x, ...)
 
 \method{print}{nullmodel}(x, ...)
 
-\method{predict}{nullmodel}(object, newdata = NULL, type = NULL, ...)
+\method{predict}{nullmodel}(object, new_data = NULL, type = NULL, ...)
 }
 \arguments{
 \item{x}{An optional matrix or data frame of predictors. These values are
@@ -26,7 +26,7 @@ outcomes}
 
 \item{object}{An object of class \code{nullmodel}}
 
-\item{newdata}{A matrix or data frame of predictors (only used to determine
+\item{new_data}{A matrix or data frame of predictors (only used to determine
 the number of predictions to return)}
 
 \item{type}{Either "raw" (for regression), "class" or "prob" (for
diff --git a/tests/testthat/test-nullmodel.R b/tests/testthat/test-nullmodel.R
index 14f2af5d7..8f775ccbc 100644
--- a/tests/testthat/test-nullmodel.R
+++ b/tests/testthat/test-nullmodel.R
@@ -43,7 +43,7 @@ test_that('bad input', {
 
 # ------------------------------------------------------------------------------
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
+num_pred <-c("Sepal.Length", "Sepal.Width", "Petal.Width")
 iris_bad_form <- as.formula(Species ~ term)
 iris_basic <- null_model(mode = "regression") %>% set_engine("parsnip")
 
@@ -63,7 +63,7 @@ test_that('nullmodel execution', {
     res <- fit_xy(
       iris_basic,
       x = iris[, num_pred],
-      y = iris$Sepal.Length
+      y = iris$Petal.Length
     ),
     regexp = NA
   )
@@ -80,27 +80,20 @@ test_that('nullmodel execution', {
 
 test_that('nullmodel prediction', {
 
-  uni_pred <- tibble(.pred = rep(5.843333, 5))
-  inl_pred <- rep(5.843333, 5)
-
-  mv_pred <-
-    structure(
-      list(Sepal.Width = seq_len(10),
-           Petal.Width = seq_len(10)),
-      class = "data.frame",
-      row.names = c(NA, -10L))
+  uni_pred <- tibble(.pred = rep(3.758, 5))
+  inl_pred <- rep(3.758, 5)
 
   res_xy <- fit_xy(
     iris_basic,
     x = iris[, num_pred],
-    y = iris$Sepal.Length
+    y = iris$Petal.Length
   )
 
-  expect_equal(uni_pred, predict(res_xy, new_data = iris[1, num_pred]))
+  expect_equal(uni_pred, predict(res_xy, new_data = iris[1:5, num_pred]))
 
   res_form <- fit(
     iris_basic,
-    Sepal.Length ~ log(Sepal.Width) + Species,
+    Petal.Length ~ log(Sepal.Width) + Species,
     data = iris
   )
   expect_equal(inl_pred, predict_numeric(res_form, iris[1:5, ]))

From d767d267b95ad702f5c05e7e1b8b408ce0bece43 Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 25 Jan 2019 18:10:47 -0800
Subject: [PATCH 4/6] more lint

---
 R/nullmodel.R | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/R/nullmodel.R b/R/nullmodel.R
index 7cb8babd5..4e205ce64 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -46,11 +46,9 @@ nullmodel <- function (x, ...) UseMethod("nullmodel")
 
 #' @export
 #' @rdname nullmodel
-nullmodel.default <- function(x = NULL, y, ...)
-{
+nullmodel.default <- function(x = NULL, y, ...) {
 
-  if(is.factor(y))
-  {
+  if(is.factor(y)) {
     lvls <- levels(y)
     tab <- table(y)
     value <- names(tab)[which.max(tab)]
@@ -71,8 +69,7 @@ nullmodel.default <- function(x = NULL, y, ...)
 
 #' @export
 #' @rdname nullmodel
-print.nullmodel <- function(x, ...)
-{
+print.nullmodel <- function(x, ...) {
   cat("Null",
       ifelse(is.null(x$levels), "Classification", "Regression"),
       "Model\n")

From 4a37edb772a57aae8858f8a5992dbe43e0f5eff0 Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 25 Jan 2019 18:49:33 -0800
Subject: [PATCH 5/6] library(parsnip)

iris_basic <- null_model(mode = "regression") %>%
  set_engine("parsnip") %>%
  fit(cbind(Sepal.Width) ~ ., data = iris)

predict_numeric(iris_basic, iris[1:5,])

iris_basic <- null_model(mode = "regression") %>%
  set_engine("parsnip") %>%
  fit(cbind(Sepal.Width, Petal.Width) ~ ., data = iris)

predict_numeric(iris_basic, iris[1:5,])
---
 R/nullmodel.R                   | 32 ++++++++++++++++++++++++++------
 tests/testthat/test-nullmodel.R | 22 ++++++++++++++++++++++
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/R/nullmodel.R b/R/nullmodel.R
index 4e205ce64..84ec41cc6 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -48,6 +48,7 @@ nullmodel <- function (x, ...) UseMethod("nullmodel")
 #' @rdname nullmodel
 nullmodel.default <- function(x = NULL, y, ...) {
 
+
   if(is.factor(y)) {
     lvls <- levels(y)
     tab <- table(y)
@@ -56,14 +57,19 @@ nullmodel.default <- function(x = NULL, y, ...) {
   } else {
     lvls <- NULL
     pct <- NULL
-    value <- mean(y, na.rm = TRUE)
+    if(is.null(dim(y))) {
+      value <- mean(y, na.rm = TRUE)
+    } else {
+      value <- colMeans(y, na.rm = TRUE)
+    }
   }
+
   structure(
     list(call = match.call(),
          value = value,
          levels = lvls,
          pct = pct,
-         n = length(y)),
+         n = length(y[[1]])),
     class = "nullmodel")
 }
 
@@ -75,9 +81,16 @@ print.nullmodel <- function(x, ...) {
       "Model\n")
   x$call
 
-  cat("Predicted Value:",
-      ifelse(is.null(x$levels), format(x$value), x$value),
-      "\n")
+  if (length(x$value) == 1) {
+    cat("Predicted Value:",
+        ifelse(is.null(x$levels), format(x$value), x$value),
+        "\n")
+  } else {
+    cat("Predicted Value:\n",
+        names(x$value), "\n",
+        x$value,
+        "\n")
+  }
 }
 
 #' @export
@@ -98,7 +111,14 @@ predict.nullmodel <- function (object, new_data = NULL, type  = NULL, ...) {
     }
   } else {
     if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models")
-    out <- rep(object$value, n)
+    if(length(object$value) == 1) {
+      out <- rep(object$value, n)
+    } else {
+      out <- as_tibble(matrix(rep(object$value, n),
+                              ncol = length(object$value), byrow = TRUE))
+
+      names(out) <- names(object$value)
+    }
   }
   out
 }
diff --git a/tests/testthat/test-nullmodel.R b/tests/testthat/test-nullmodel.R
index 8f775ccbc..2d82f59c6 100644
--- a/tests/testthat/test-nullmodel.R
+++ b/tests/testthat/test-nullmodel.R
@@ -76,12 +76,25 @@ test_that('nullmodel execution', {
     )
   )
 
+  ## multivariate y
+
+  expect_error(
+    res <- fit(
+      iris_basic,
+      cbind(Sepal.Width, Petal.Width) ~ .,
+      data = iris
+    ),
+    regexp = NA
+  )
+
 })
 
 test_that('nullmodel prediction', {
 
   uni_pred <- tibble(.pred = rep(3.758, 5))
   inl_pred <- rep(3.758, 5)
+  mw_pred <- tibble(gear = rep(3.6875, 5),
+                    carb = rep(2.8125, 5))
 
   res_xy <- fit_xy(
     iris_basic,
@@ -97,6 +110,15 @@ test_that('nullmodel prediction', {
     data = iris
   )
   expect_equal(inl_pred, predict_numeric(res_form, iris[1:5, ]))
+
+  # Multivariate y
+  res <- fit(
+    iris_basic,
+    cbind(gear, carb) ~ .,
+    data = mtcars
+  )
+
+  expect_equal(mw_pred, predict_numeric(res, mtcars[1:5, ]))
 })
 
 # ------------------------------------------------------------------------------

From 0ca0445c134eafc659c33a7e4aea548719b53362 Mon Sep 17 00:00:00 2001
From: EmilHvitfeldt <emilhhvitfeldt@gmail.com>
Date: Tue, 29 Jan 2019 13:09:05 -0800
Subject: [PATCH 6/6] added classprob prediction type

---
 R/nullmodel_data.R                             | 18 ++++++++++++++++--
 .../{test-nullmodel.R => test_nullmodel.R}     |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)
 rename tests/testthat/{test-nullmodel.R => test_nullmodel.R} (99%)

diff --git a/R/nullmodel_data.R b/R/nullmodel_data.R
index 3de382220..80380a98a 100644
--- a/R/nullmodel_data.R
+++ b/R/nullmodel_data.R
@@ -23,14 +23,28 @@ null_model_parsnip_data <-
       defaults = list()
       ),
     class = list(
+      pre = NULL,
+      post = NULL,
+      func = c(fun = "predict"),
+      args =
+        list(
+          object = quote(object$fit),
+          new_data = quote(new_data),
+          type = "class"
+        )
+    ),
+    classprob = list(
         pre = NULL,
-        post = NULL,
+        post = function(x, object) {
+          str(as_tibble(x))
+          as_tibble(x)
+        },
         func = c(fun = "predict"),
         args =
           list(
             object = quote(object$fit),
             new_data = quote(new_data),
-            type = "class"
+            type = "prob"
           )
       ),
     numeric = list(
diff --git a/tests/testthat/test-nullmodel.R b/tests/testthat/test_nullmodel.R
similarity index 99%
rename from tests/testthat/test-nullmodel.R
rename to tests/testthat/test_nullmodel.R
index 2d82f59c6..45a6a9932 100644
--- a/tests/testthat/test-nullmodel.R
+++ b/tests/testthat/test_nullmodel.R
@@ -134,3 +134,4 @@ test_that('classification', {
   expect_true(!is.null(null_model$fit))
 })
 
+