From f585b33becebf5d4251cfbed9fd8ec38385321e3 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 10:37:52 -0500 Subject: [PATCH 01/25] Change README --- DESCRIPTION | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 883b9ba..4b8024c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,7 @@ Description: Tundra provides a standardized container format for classifiers developed in R. This allows easier deployment by keeping both the data preparation procedure and the statistics in one place with an easy interface. -Version: 0.2.3 +Version: 0.3.0 Author: Robert Krzyzanowski Maintainer: Robert Krzyzanowski Authors@R: c(person("Robert", "Krzyzanowski", diff --git a/README.md b/README.md index 705e251..ccde103 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Tundra [![Build Status](https://img.shields.io/travis/robertzk/tundra.svg)](https://travis-ci.org/robertzk/tundra.svg?branch=master) [![Coverage Status](https://img.shields.io/coveralls/robertzk/tundra.svg)](https://coveralls.io/r/robertzk/tundra) ![Release Tag](https://img.shields.io/github/tag/robertzk/tundra.svg) +Deployable Models in R [![Build Status](https://img.shields.io/travis/robertzk/tundra.svg)](https://travis-ci.org/robertzk/tundra.svg?branch=master) [![Coverage Status](https://img.shields.io/coveralls/robertzk/tundra.svg)](https://coveralls.io/r/robertzk/tundra) ![Release Tag](https://img.shields.io/github/tag/robertzk/tundra.svg) ====== ![tundra](http://i.imgur.com/tBFgOBW.jpg) From eaabcff13b5d28b54276984b9992e71174bc30e9 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 10:45:10 -0500 Subject: [PATCH 02/25] Initial documentation of the package. --- NAMESPACE | 2 +- R/package.tundra.R | 33 ++++++++++++++++++++++++++ man/tundra.Rd | 34 ++++++++++++++++++++++++--- man/tundraContainer.Rd | 3 ++- man/tundra_container.Rd | 3 ++- man/tundra_gbm_train_fn.Rd | 3 ++- man/tundra_regularization_train_fn.Rd | 3 ++- man/tundra_rf_train_fn.Rd | 3 ++- 8 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 R/package.tundra.R diff --git a/NAMESPACE b/NAMESPACE index dfe7b94..2011ae1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.0.1): do not edit by hand +# Generated by roxygen2 (4.1.1): do not edit by hand S3method(print,tundraContainer) S3method(summary,tundraContainer) diff --git a/R/package.tundra.R b/R/package.tundra.R new file mode 100644 index 0000000..de65fa0 --- /dev/null +++ b/R/package.tundra.R @@ -0,0 +1,33 @@ +#' Tundra is a standardized classifier container format for R. +#' +#' Deploying models in production systems is generally a cumbersome process. +#' If analysis is performed in a language like R or SAS, the coefficients of the +#' model are usually extracted and translated to a "production-ready" language like +#' R or Java. +#' +#' However, this approach is flawed. The translation process is time consuming +#' and error-prone. R is demonstrably capable of serving models +#' in production environments as long as submillisecond latency is not a +#' requirement. This means it should be possible to push analysis performed in +#' R to directly score records in production systems without an intermediary. +#' This significantly decreases the cost of iterating on machine learning +#' models. +#' +#' A tundraContainer is a simple bundling of the two critical components of +#' any machine learning model. +#' +#' \itemize{ +#' \item{The data preparation required to convert raw production data to +#' a record that is acceptable to a trained classifier. For example, +#' a regression-based model may need discretization of non-categorical +#' variables or imputation of missing values.} +#' \item{The trained classifier, usually a native R S3 object with +#' a \code{train} method.} +#' } +#' +#' The former is provided by the \href{https://github.com/robertzk/mungebits}{mungebits} +#' package, while the latter is fully customizable to any R function. +#' +#' @name tundra +#' @docType package +NULL diff --git a/man/tundra.Rd b/man/tundra.Rd index 8edd33e..b733481 100644 --- a/man/tundra.Rd +++ b/man/tundra.Rd @@ -1,10 +1,38 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/package.tundra.R \docType{package} \name{tundra} \alias{tundra} \alias{tundra-package} -\title{tundra} +\title{Tundra is a standardized classifier container format for R.} \description{ -tundra +Deploying models in production systems is generally a cumbersome process. +If analysis is performed in a language like R or SAS, the coefficients of the +model are usually extracted and translated to a "production-ready" language like +R or Java. +} +\details{ +However, this approach is flawed. The translation process is time consuming +and error-prone. R is demonstrably capable of serving models +in production environments as long as submillisecond latency is not a +requirement. This means it should be possible to push analysis performed in +R to directly score records in production systems without an intermediary. +This significantly decreases the cost of iterating on machine learning +models. + +A tundraContainer is a simple bundling of the two critical components of +any machine learning model. + +\itemize{ + \item{The data preparation required to convert raw production data to + a record that is acceptable to a trained classifier. For example, + a regression-based model may need discretization of non-categorical + variables or imputation of missing values.} + \item{The trained classifier, usually a native R S3 object with + a \code{train} method.} +} + +The former is provided by the \href{https://github.com/robertzk/mungebits}{mungebits} +package, while the latter is fully customizable to any R function. } diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 268a51c..4162964 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundra_container.r \docType{class} \name{tundraContainer} \title{Tundra container class} diff --git a/man/tundra_container.Rd b/man/tundra_container.Rd index 4917e83..a80fe47 100644 --- a/man/tundra_container.Rd +++ b/man/tundra_container.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundra_container.r \name{tundra_container} \alias{tundra_container} \title{tundra_container} diff --git a/man/tundra_gbm_train_fn.Rd b/man/tundra_gbm_train_fn.Rd index a38a0ce..db80c75 100644 --- a/man/tundra_gbm_train_fn.Rd +++ b/man/tundra_gbm_train_fn.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundra_gbm.R \name{tundra_gbm_train_fn} \alias{tundra_gbm_train_fn} \title{Tundra GBM wrapper} diff --git a/man/tundra_regularization_train_fn.Rd b/man/tundra_regularization_train_fn.Rd index 89fd863..d53b6e4 100644 --- a/man/tundra_regularization_train_fn.Rd +++ b/man/tundra_regularization_train_fn.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundra_regularization.r \name{tundra_regularization_train_fn} \alias{tundra_regularization_train_fn} \title{Tundra regularization wrapper} diff --git a/man/tundra_rf_train_fn.Rd b/man/tundra_rf_train_fn.Rd index 53b7920..b7353bd 100644 --- a/man/tundra_rf_train_fn.Rd +++ b/man/tundra_rf_train_fn.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundra_random_forest.r \name{tundra_rf_train_fn} \alias{tundra_rf_train_fn} \title{Tundra Random Forest wrapper} From fa66b6612f44263fa4782427f0faae96300071fc Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 10:45:29 -0500 Subject: [PATCH 03/25] Remove zzz file --- R/tundra-package.r | 5 ----- R/zzz.r | 5 ----- 2 files changed, 10 deletions(-) delete mode 100644 R/tundra-package.r delete mode 100644 R/zzz.r diff --git a/R/tundra-package.r b/R/tundra-package.r deleted file mode 100644 index 26e71f0..0000000 --- a/R/tundra-package.r +++ /dev/null @@ -1,5 +0,0 @@ -#' tundra -#' -#' @name tundra -#' @docType package -NULL diff --git a/R/zzz.r b/R/zzz.r deleted file mode 100644 index 7d1af32..0000000 --- a/R/zzz.r +++ /dev/null @@ -1,5 +0,0 @@ -.onAttach <- function(...) { - require(mungebits) - require(syberiaMungebits) - require(stagerunner) -} From 8ca7beccf90a6aba7574eb83b5f1310513a2d8bb Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 10:49:06 -0500 Subject: [PATCH 04/25] Initial R6 class for tundraContainer. --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/package.tundra.R | 6 +++++- R/tundraContainer.R | 12 ++++++++++++ man/tundra.Rd | 6 +++++- man/tundraContainer.Rd | 14 ++++++++++++-- 6 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 R/tundraContainer.R diff --git a/DESCRIPTION b/DESCRIPTION index 4b8024c..47cc942 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,8 @@ Depends: R (>= 3.0.1) Imports: mungebits, - stagerunner + stagerunner, + R6 License: MIT LazyData: true Suggests: diff --git a/NAMESPACE b/NAMESPACE index 2011ae1..3167f4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(print,tundraContainer) S3method(summary,tundraContainer) +export(tundraContainer) export(tundra_container) export(tundra_gbm) export(tundra_random_forest) diff --git a/R/package.tundra.R b/R/package.tundra.R index de65fa0..5469977 100644 --- a/R/package.tundra.R +++ b/R/package.tundra.R @@ -26,7 +26,11 @@ #' } #' #' The former is provided by the \href{https://github.com/robertzk/mungebits}{mungebits} -#' package, while the latter is fully customizable to any R function. +#' package, while the latter is fully customizable to any R function. This +#' approach allows arbitrary data preparation and statistical methods, unlike +#' attempts such as PMML (Predictive Modeling Markup Language) which constrain +#' the space of possible data preparation methodologies and statistical +#' methodologies to a very limited subset. #' #' @name tundra #' @docType package diff --git a/R/tundraContainer.R b/R/tundraContainer.R new file mode 100644 index 0000000..30e7ffe --- /dev/null +++ b/R/tundraContainer.R @@ -0,0 +1,12 @@ +#' A standard container format for classifiers developed in R. +#' +#' @docType class +#' @name tundraContainer +#' @export +tundraContainer <- R6::R6Class("tundraContainer", + public = list( + initialize = function(...) { + } + ) +) + diff --git a/man/tundra.Rd b/man/tundra.Rd index b733481..2be884c 100644 --- a/man/tundra.Rd +++ b/man/tundra.Rd @@ -33,6 +33,10 @@ any machine learning model. } The former is provided by the \href{https://github.com/robertzk/mungebits}{mungebits} -package, while the latter is fully customizable to any R function. +package, while the latter is fully customizable to any R function. This +approach allows arbitrary data preparation and statistical methods, unlike +attempts such as PMML (Predictive Modeling Markup Language) which constrain +the space of possible data preparation methodologies and statistical +methodologies to a very limited subset. } diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 4162964..76bfaaa 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -1,9 +1,19 @@ % Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/tundra_container.r +% Please edit documentation in R/tundraContainer.R, R/tundra_container.r \docType{class} \name{tundraContainer} -\title{Tundra container class} +\alias{tundraContainer} +\title{A standard container format for classifiers developed in R.} +\format{\preformatted{Class 'R6ClassGenerator' + - attr(*, "name")= chr "tundraContainer_generator" +}} +\usage{ +tundraContainer +} \description{ +A standard container format for classifiers developed in R. + TODO: Formally define parameter spaces for models } +\keyword{datasets} From 2c47f2fa196fc164952a6d38d2e3b3b18cd31c04 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 10:52:32 -0500 Subject: [PATCH 05/25] Initial members of a tundraContainer object. --- R/tundraContainer.R | 11 +++++++++++ man/tundraContainer.Rd | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R/tundraContainer.R b/R/tundraContainer.R index 30e7ffe..85247e6 100644 --- a/R/tundraContainer.R +++ b/R/tundraContainer.R @@ -5,6 +5,17 @@ #' @export tundraContainer <- R6::R6Class("tundraContainer", public = list( + .keyword = NULL, # character + .train_function = NULL, # function or NULL + .predict_function = NULL, # function or NULL + .munge_procedure = NULL, # list of mungepieces + .default_args = NULL, # list + .trained = FALSE, # logical + .input = NULL, # environment + .output = NULL, # environment + .internal = NULL, # environment + .hooks = NULL, # list + initialize = function(...) { } ) diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 76bfaaa..db44124 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 105f160931707b6f1b5982f5d5a78e589b5732d9 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:13:27 -0500 Subject: [PATCH 06/25] Initial documentation for tundraContainer constructo.r --- R/tundraContainer-initialize.R | 33 +++++++++++++++++++++++++ R/tundraContainer.R | 8 +++--- man/initialize.Rd | 45 ++++++++++++++++++++++++++++++++++ man/tundraContainer.Rd | 2 +- 4 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 R/tundraContainer-initialize.R create mode 100644 man/initialize.Rd diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R new file mode 100644 index 0000000..e6467da --- /dev/null +++ b/R/tundraContainer-initialize.R @@ -0,0 +1,33 @@ +#' Initialize a tundraContainer object. +#' +#' @param keyword character. The name of the classifier; for example, +#' "lm" or "knn". +#' @param train_function function. The function used to train the model. +#' Its first argument will be a data.frame, and the second argument +#' a list of additional parameters used for training the model. +#' @param predict_function function. The function used to predict +#' on new datasets. Its first argument will be a data.frame, +#' the dataset to predict on it, and its second (optional) +#' argument will be additional parameters used for prediction +#' output (such as whether to return a probabilistic or absolute +#' value). +#' @param munge_procedure list. A list of trained +#' \code{\link[mungebits]{mungepiece}}s to apply to data sets +#' during prediction. +#' @param default_args list. A list of default arguments to provide to +#' the second argument to the \code{train_function}. The additional +#' arguments provided to the \code{tundraContainer}'s \code{train} +#' method will be merged on top of these defaults. +#' @param internal list. Internal metadata that should accompany the +#' model. Usually this is domain/organization specific, and can +#' include things such as a list of primary keys used for training +#' the model, identifiers or names of data sources used for +#' training the model, etc. It is a playground entirely under +#' your control, and can be used by other packages or a production +#' server hosting the model to achieve additional behavior. +initialize <- function(keyword, train_function = identity, + predict_function = identity, munge_procedure = list(), + default_args = list(), internal = list()) { + +} + diff --git a/R/tundraContainer.R b/R/tundraContainer.R index 85247e6..3497950 100644 --- a/R/tundraContainer.R +++ b/R/tundraContainer.R @@ -6,8 +6,8 @@ tundraContainer <- R6::R6Class("tundraContainer", public = list( .keyword = NULL, # character - .train_function = NULL, # function or NULL - .predict_function = NULL, # function or NULL + .train_function = NULL, # function + .predict_function = NULL, # function .munge_procedure = NULL, # list of mungepieces .default_args = NULL, # list .trained = FALSE, # logical @@ -16,8 +16,8 @@ tundraContainer <- R6::R6Class("tundraContainer", .internal = NULL, # environment .hooks = NULL, # list - initialize = function(...) { - } + initialize = initialize + ) ) diff --git a/man/initialize.Rd b/man/initialize.Rd new file mode 100644 index 0000000..efcd6cc --- /dev/null +++ b/man/initialize.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundraContainer-initialize.R +\name{initialize} +\alias{initialize} +\title{Initialize a tundraContainer object.} +\usage{ +initialize(keyword, train_function = identity, predict_function = identity, + munge_procedure = list(), default_args = list(), internal = list()) +} +\arguments{ +\item{keyword}{character. The name of the classifier; for example, +"lm" or "knn".} + +\item{train_function}{function. The function used to train the model. +Its first argument will be a data.frame, and the second argument +a list of additional parameters used for training the model.} + +\item{predict_function}{function. The function used to predict +on new datasets. Its first argument will be a data.frame, +the dataset to predict on it, and its second (optional) +argument will be additional parameters used for prediction +output (such as whether to return a probabilistic or absolute +value).} + +\item{munge_procedure}{list. A list of trained +\code{\link[mungebits]{mungepiece}}s to apply to data sets +during prediction.} + +\item{default_args}{list. A list of default arguments to provide to +the second argument to the \code{train_function}. The additional +arguments provided to the \code{tundraContainer}'s \code{train} +method will be merged on top of these defaults.} + +\item{internal}{list. Internal metadata that should accompany the +model. Usually this is domain/organization specific, and can +include things such as a list of primary keys used for training +the model, identifiers or names of data sources used for +training the model, etc. It is a playground entirely under +your control, and can be used by other packages or a production +server hosting the model to achieve additional behavior.} +} +\description{ +Initialize a tundraContainer object. +} + diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index db44124..6a5279f 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 45c32ac19205e3c55e0b87515e1b4078a8f27c6d Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:20:33 -0500 Subject: [PATCH 07/25] Initialize method for tundraContainer --- DESCRIPTION | 3 ++- R/tundraContainer-initialize.R | 10 ++++++++++ man/tundraContainer.Rd | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 47cc942..d3f34d4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,8 @@ Depends: Imports: mungebits, stagerunner, - R6 + R6, + crayon License: MIT LazyData: true Suggests: diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R index e6467da..328d3ec 100644 --- a/R/tundraContainer-initialize.R +++ b/R/tundraContainer-initialize.R @@ -28,6 +28,16 @@ initialize <- function(keyword, train_function = identity, predict_function = identity, munge_procedure = list(), default_args = list(), internal = list()) { + if (!(is.list(munge_procedure) || is(munge_procedure, "stageRunner"))) { + stop("The ", sQuote("munge_procedure"), " parameter must be a list or ", + "stageRunner object.") + } + .keyword <<- keyword + .train_function <<- train_function + .predict_function <<- predict_function + .munge_procedure <<- munge_procedure + .default_args <<- default_args + .internal <<- internal } diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 6a5279f..9fafa31 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 5877a51c7c5c9c280b8fa2250a5268eb0c6a20be Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:24:00 -0500 Subject: [PATCH 08/25] Test tundraContainer construction. --- DESCRIPTION | 3 +-- tests/test-all.R | 4 +++- tests/testthat/test-initialize.R | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test-initialize.R diff --git a/DESCRIPTION b/DESCRIPTION index d3f34d4..edd6116 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,5 @@ Imports: License: MIT LazyData: true Suggests: - knitr, - microbenchmark + testthatsomemore Roxygen: list(wrap = FALSE) diff --git a/tests/test-all.R b/tests/test-all.R index 8584001..4c32bda 100644 --- a/tests/test-all.R +++ b/tests/test-all.R @@ -1,2 +1,4 @@ library(testthat) -test_package("tundra") +library(testthatsomemore) +library(tundra) +test_check("tundra") diff --git a/tests/testthat/test-initialize.R b/tests/testthat/test-initialize.R new file mode 100644 index 0000000..78a0de1 --- /dev/null +++ b/tests/testthat/test-initialize.R @@ -0,0 +1,16 @@ +context("tundraContainer$initialize") +library(testthatsomemore) + +test_that("it errors when an invalid munge_procedure is provided", { + expect_error(tundraContainer$new("foo", munge_procedure = NULL), "must be a list") + expect_error(tundraContainer$new("foo", munge_procedure = 5), "must be a list") + expect_error(tundraContainer$new("foo", munge_procedure = force), "must be a list") +}) + +test_that("it successfully creates a tundraContainer when a valid munge_procedure is provided", { + testthatsomemore::assert(tundraContainer$new("foo", munge_procedure = list())) + testthatsomemore::assert( + tundraContainer$new("foo", munge_procedure = stagerunner::stageRunner$new(list(force))) + ) +}) + From 8f8a977d441ee6a97f643d8ad0edcdc835267d61 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:34:48 -0500 Subject: [PATCH 09/25] Initial documentation for tundra train method. --- R/tundraContainer-train.R | 24 ++++++++++++++++++++++++ R/tundraContainer.R | 3 ++- man/train.Rd | 38 ++++++++++++++++++++++++++++++++++++++ man/tundraContainer.Rd | 2 +- 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 R/tundraContainer-train.R create mode 100644 man/train.Rd diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R new file mode 100644 index 0000000..e3d09de --- /dev/null +++ b/R/tundraContainer-train.R @@ -0,0 +1,24 @@ +#' Train a model encapsulated within a tundraContainer. +#' +#' @param dataframe data.frame. The data set to train the model on. This +#' will be preprocessed with the \code{tundraContainer}'s +#' \code{munge_procedure} and then passed as the first argument to +#' the \code{tundraContainer}'s \code{train_function}. +#' @param train_args list. A list of arguments to pass to make available +#' to the \code{tundraContainer}'s \code{train_function} through +#' use of the \code{input} keyword. See the examples. +#' @param verbose logical. Either \code{TRUE} or \code{FALSE}, by +#' default the latter. If \code{TRUE}, then output produced by +#' running the \code{munge_procedure} or the \code{train_function} +#' will not be silenced. +#' @param munge logical. Either \code{TRUE} or \code{FALSE}, by +#' default the former. If \code{FALSE}, the \code{munge_procedure} +#' provided to the container during initialization will be assumed +#' to have been trained, and the \code{dataframe} provided will not +#' be run through it. +#' @return The value returned by the \code{tundraContainer}'s +#' \code{train_function}. Since the \code{train_function} has side effects +#' on the container, this can usually be \code{invisible(NULL)}. +train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) { +} + diff --git a/R/tundraContainer.R b/R/tundraContainer.R index 3497950..574712b 100644 --- a/R/tundraContainer.R +++ b/R/tundraContainer.R @@ -16,7 +16,8 @@ tundraContainer <- R6::R6Class("tundraContainer", .internal = NULL, # environment .hooks = NULL, # list - initialize = initialize + initialize = initialize, + train = train ) ) diff --git a/man/train.Rd b/man/train.Rd new file mode 100644 index 0000000..330b91c --- /dev/null +++ b/man/train.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundraContainer-train.R +\name{train} +\alias{train} +\title{Train a model encapsulated within a tundraContainer.} +\usage{ +train(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) +} +\arguments{ +\item{dataframe}{data.frame. The data set to train the model on. This +will be preprocessed with the \code{tundraContainer}'s +\code{munge_procedure} and then passed as the first argument to +the \code{tundraContainer}'s \code{train_function}.} + +\item{train_args}{list. A list of arguments to pass to make available +to the \code{tundraContainer}'s \code{train_function} through +use of the \code{input} keyword. See the examples.} + +\item{verbose}{logical. Either \code{TRUE} or \code{FALSE}, by +default the latter. If \code{TRUE}, then output produced by +running the \code{munge_procedure} or the \code{train_function} +will not be silenced.} + +\item{munge}{logical. Either \code{TRUE} or \code{FALSE}, by +default the former. If \code{FALSE}, the \code{munge_procedure} +provided to the container during initialization will be assumed +to have been trained, and the \code{dataframe} provided will not +be run through it.} +} +\value{ +The value returned by the \code{tundraContainer}'s + \code{train_function}. Since the \code{train_function} has side effects + on the container, this can usually be \code{invisible(NULL)}. +} +\description{ +Train a model encapsulated within a tundraContainer. +} + diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 9fafa31..0661b5d 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 18e021cfe15dd767a9f494b11e29e587e998a874 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:37:20 -0500 Subject: [PATCH 10/25] Initial sketch at train method. --- R/tundraContainer-run_hooks.R | 3 +++ R/tundraContainer-train.R | 9 +++++++++ R/tundraContainer.R | 8 ++++++-- man/tundraContainer.Rd | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 R/tundraContainer-run_hooks.R diff --git a/R/tundraContainer-run_hooks.R b/R/tundraContainer-run_hooks.R new file mode 100644 index 0000000..40d3f81 --- /dev/null +++ b/R/tundraContainer-run_hooks.R @@ -0,0 +1,3 @@ +run_hooks <- function(...) { + +} diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index e3d09de..6a5e9e3 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -20,5 +20,14 @@ #' \code{train_function}. Since the \code{train_function} has side effects #' on the container, this can usually be \code{invisible(NULL)}. train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) { + if (isTRUE(self$.trained)) { + stop("The tundra ", sQuote(self$.keyword), " model has already been trained.") + } + + force(train_args) + force(verbose) + force(munge) + + private$run_hooks("train_pre_munge") } diff --git a/R/tundraContainer.R b/R/tundraContainer.R index 574712b..f4f2b0a 100644 --- a/R/tundraContainer.R +++ b/R/tundraContainer.R @@ -18,7 +18,11 @@ tundraContainer <- R6::R6Class("tundraContainer", initialize = initialize, train = train - - ) + ), + + private = list( + run_hooks = run_hooks + ) + ) diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 0661b5d..59f6205 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From fd6c2b301167dcdffeffebc175b7494dbc938bf3 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:50:15 -0500 Subject: [PATCH 11/25] Document call_with helper. --- R/tundraContainer-initialize.R | 4 ++++ R/tundraContainer-train.R | 19 ++++++++++++++++++ R/utils.r | 31 +++++++++++++++++++++++++++++ man/call_with.Rd | 36 ++++++++++++++++++++++++++++++++++ man/tundraContainer.Rd | 2 +- 5 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 man/call_with.Rd diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R index 328d3ec..3e187bb 100644 --- a/R/tundraContainer-initialize.R +++ b/R/tundraContainer-initialize.R @@ -39,5 +39,9 @@ initialize <- function(keyword, train_function = identity, .munge_procedure <<- munge_procedure .default_args <<- default_args .internal <<- internal + + .input <<- list_to_env(list()) + .output <<- list_to_env(list()) + .internal <<- list_to_env(list()) } diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index 6a5e9e3..189d990 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -29,5 +29,24 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) force(munge) private$run_hooks("train_pre_munge") + if (isTRUE(munge) && length(munge_procedure) > 0) { + dataframe <- munge(dataframe, self$.munge_procedure, verbose) + attr(dataframe, "mungepieces") <- NULL + } + private$run_hooks("train_post_munge") + + call_with( + self$.train_function, + list(dataframe), + list(input = self$.input, output = self$.output) + ) +} + +munge <- function(dataframe, munge_procedure, verbose) { + if (isTRUE(verbose)) { + capture.output(Recall(dataframe, munge_procedure, FALSE)) + } else { + mungebits::munge(dataframe, munge_procedure) + } } diff --git a/R/utils.r b/R/utils.r index f7a58c9..935edcc 100644 --- a/R/utils.r +++ b/R/utils.r @@ -1,2 +1,33 @@ `%||%` <- function(x, y) if (is.null(x)) y else x +list_to_env <- function(obj, parent = emptyenv()) { + if (length(obj) == 0) { + new.env(parent = parent) + } else { + list2env(obj, parent = parent) + } +} + +#' Evaluate a function while injecting some locals. +#' +#' Instead of modifying a closure's parent environment directly, +#' sometimes it may be desirable to do a one-time injection that +#' overrides what would normally be accessible through the closure. +#' \code{call_with} allows this by extending the usual \code{do.call} +#' to a third argument that is a list or environment temporarily +#' injected during the course of the call. +#' +#' @param fn function. +#' @param args list. The arguments to call the \code{fn} with. +#' @param with list or environment. Additional locals to make available +#' during the call. +#' @return The result of calling \code{fn} with the injection provided +#' by the \code{with} parameter. +#' @examples \dontrun{ +#' fn <- local({ x <- 1; function(x) { x + y } }) +#' stopifnot(fn(1) == 2) +#' stopifnot(call_with(fn, list(1), list(x = 2)) == 3) +#' } +call_with <- function(fn, args, with) { + +} diff --git a/man/call_with.Rd b/man/call_with.Rd new file mode 100644 index 0000000..568fcb1 --- /dev/null +++ b/man/call_with.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/utils.r +\name{call_with} +\alias{call_with} +\title{Evaluate a function while injecting some locals.} +\usage{ +call_with(fn, args, with) +} +\arguments{ +\item{fn}{function.} + +\item{args}{list. The arguments to call the \code{fn} with.} + +\item{with}{list or environment. Additional locals to make available +during the call.} +} +\value{ +The result of calling \code{fn} with the injection provided + by the \code{with} parameter. +} +\description{ +Instead of modifying a closure's parent environment directly, +sometimes it may be desirable to do a one-time injection that +overrides what would normally be accessible through the closure. +\code{call_with} allows this by extending the usual \code{do.call} +to a third argument that is a list or environment temporarily +injected during the course of the call. +} +\examples{ +\dontrun{ +fn <- local({ x <- 1; function(x) { x + y } }) +stopifnot(fn(1) == 2) +stopifnot(call_with(fn, list(1), list(x = 2)) == 3) +} +} + diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 59f6205..cae2f0e 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From fe5a6bef8153f8e586c86e2efcc265f1a372008f Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:53:13 -0500 Subject: [PATCH 12/25] Fully working call_with helper. --- R/utils.r | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/utils.r b/R/utils.r index 935edcc..e63f281 100644 --- a/R/utils.r +++ b/R/utils.r @@ -24,10 +24,20 @@ list_to_env <- function(obj, parent = emptyenv()) { #' @return The result of calling \code{fn} with the injection provided #' by the \code{with} parameter. #' @examples \dontrun{ -#' fn <- local({ x <- 1; function(x) { x + y } }) +#' fn <- local({ x <- 1; function(y) { x + y } }) #' stopifnot(fn(1) == 2) #' stopifnot(call_with(fn, list(1), list(x = 2)) == 3) #' } call_with <- function(fn, args, with) { - + stopifnot(is.list(with) || is.environment(with)) + debugged <- isdebugged(fn) + copy_fn <- fn + if (debugged) debug(copy_fn) + env <- with + if (!is.environment(with)) { + with <- list_to_env(with, parent = environment(copy_fn)) + } + environment(fn) <- with + do.call(fn, args) } + From b3b73883aba40ca5a5159eb16543ab506c1a6bc6 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:56:18 -0500 Subject: [PATCH 13/25] Test permutation of call_with possibilities. --- tests/testthat/test-call_with.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/testthat/test-call_with.R diff --git a/tests/testthat/test-call_with.R b/tests/testthat/test-call_with.R new file mode 100644 index 0000000..14df154 --- /dev/null +++ b/tests/testthat/test-call_with.R @@ -0,0 +1,24 @@ +context("call_with") + +test_that("it correctly injects a simple example", { + fn <- local({ x <- 1; function(y) { x + y } }) + expect_identical(call_with(fn, list(1), list(x = 2)), 3) +}) + +test_that("it correctly injects a simple example with an environment", { + fn <- local({ x <- 1; function(y) { x + y } }) + expect_identical(call_with(fn, list(1), list2env(list(x = 2))), 3) +}) + +test_that("it correctly injects a simple example with an environment override", { + y <- 2 + fn <- local({ x <- 1; y <- 1; function(z) { c(x, y, z) } }) + expect_identical(call_with(fn, list(1), list2env(list(x = 2))), c(2, 2, 1)) +}) + +test_that("it correctly injects a simple example with a list", { + y <- 2 + fn <- local({ x <- 1; y <- 1; function(z) { c(x, y, z) } }) + expect_identical(call_with(fn, list(1), list(x = 2)), c(2, 1, 1)) +}) + From 5d2241351fbfd7629a535fa7aa50c706f0141404 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 11:57:30 -0500 Subject: [PATCH 14/25] Check that call_with has no side effects. --- tests/testthat/test-call_with.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/testthat/test-call_with.R b/tests/testthat/test-call_with.R index 14df154..22d9c0c 100644 --- a/tests/testthat/test-call_with.R +++ b/tests/testthat/test-call_with.R @@ -22,3 +22,9 @@ test_that("it correctly injects a simple example with a list", { expect_identical(call_with(fn, list(1), list(x = 2)), c(2, 1, 1)) }) +test_that("it does not modify the original function's environment", { + fn <- local({ x <- 1; function(y) { x + y } }) + call_with(fn, list(1), list(x = 2)) + expect_identical(fn(1), 2) +}) + From cce62b68bf08322b04ccc17258ae68181350f815 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:03:26 -0500 Subject: [PATCH 15/25] Initial working tundra train. --- R/tundraContainer-train.R | 7 ++++++- man/call_with.Rd | 2 +- man/tundraContainer.Rd | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index 189d990..e47c935 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -35,11 +35,16 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) } private$run_hooks("train_post_munge") - call_with( + output <- call_with( self$.train_function, list(dataframe), list(input = self$.input, output = self$.output) ) + + private$run_hooks("train_finalize") + self$.trained <<- TRUE + + output } munge <- function(dataframe, munge_procedure, verbose) { diff --git a/man/call_with.Rd b/man/call_with.Rd index 568fcb1..30ca548 100644 --- a/man/call_with.Rd +++ b/man/call_with.Rd @@ -28,7 +28,7 @@ injected during the course of the call. } \examples{ \dontrun{ -fn <- local({ x <- 1; function(x) { x + y } }) +fn <- local({ x <- 1; function(y) { x + y } }) stopifnot(fn(1) == 2) stopifnot(call_with(fn, list(1), list(x = 2)) == 3) } diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index cae2f0e..095e51e 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 74f52d25dbfbe909e3ed252e9495e787bfeaabdb Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:05:15 -0500 Subject: [PATCH 16/25] Add a tundra logo. --- README.md | 2 +- inst/images/tundra.gif | Bin 0 -> 12343 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 inst/images/tundra.gif diff --git a/README.md b/README.md index ccde103..308021b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Deployable Models in R [![Build Status](https://img.shields.io/travis/robertzk/tundra.svg)](https://travis-ci.org/robertzk/tundra.svg?branch=master) [![Coverage Status](https://img.shields.io/coveralls/robertzk/tundra.svg)](https://coveralls.io/r/robertzk/tundra) ![Release Tag](https://img.shields.io/github/tag/robertzk/tundra.svg) ====== -![tundra](http://i.imgur.com/tBFgOBW.jpg) +![tundra logo](inst/images/tundra.gif) Tundra provides a standardized container format for classifiers developed in R. It "freezes" your models so you can put them in production safely! diff --git a/inst/images/tundra.gif b/inst/images/tundra.gif new file mode 100644 index 0000000000000000000000000000000000000000..222fd39da665860c80373a1d7eccf66271ecaf6e GIT binary patch literal 12343 zcmWk!c{G&Y8^7;6n_(~*V=x%|uCXtv-mxcXEFq*JBuPU^`^?z)u_Ri@nouDX+QynK zQ7V;cNYPFk^{wXD?>YB8=RW7$`^P=!p67m+>%QL2&OTfdCPEJZ=>M?1dGPI)p|>3i zKN{!0efjTC`SYdici*Q!{v0;E@ao5ZAAbH08-1I2|6}fx#b;lC`QCXOI{a_En_T*N zVDjV2pT99gAWM!|xZC|GWEn<-fnb`Og;L zEdQQZT)Fr8=aGk>|NHYdWaw>`=xfN(yRgys+lJnUjl2sPc`FwG{VV<(HuCiGCO&1q5Z(!`0FnhI}UkYdiHtc#*urU{}Xe#ytMR; z%&hF3+`RmP!XkcgNom=!^5YeiCr+L^edes7s=B7OuKrxZ`NpQ^3l}f7w6Y;3{ZtU$XU=GY|bX-$T>^&8d}CkXa+3``U5di z)j>>_2SLR?+T*-WLN`x(ol#h5rKDRgUSB4NE}*^6ry89l`O|xPATe4tNOGg1b2k^r z`J(L*fi@w-N3wwh!uus9$Oz%Nr(|Z9j9KWS2aSfHfJ;oa4lhkG$WsNO!j>Kk3X8WR ziiVX)1RRvw;lb{W-y$&F`X-%`=H^;KL3gBT_71QJ>c3)NSfQ|x453Glt+_ob512Wc z*vHw@ZPop35p4U=u^vrU@}cYe~sM-X$k3o z&o9>I2w-7}l&pT(4;Kk7x|5XzZT|tDtckZ2TX6dNVUNOa5Ca&Xr02N!4(T_yQ>)0X~g@^2! z{A}c5i07sCL?ouSASD#6`hZMHJ?!K#VxP*D$rpIZmoOPW>->B0#Tn(*xvA1S(qvT7 zoC;5?NM?9xq!!$ZMUA@7ZGF|rIw=LSa0oW)(a-F?MpVE+w@IJtl_JtNb4pUkxf0UW zd>!;Am}oyOBiD>PD9H?n9VA%DH)@~?y;BegX>NOYT`jh{e7kfvlY=SA&_z&+B$Bg> z{IOVLb-U4VWrAISVIPchp-)RHC6#Gzr|DX~yD-|-Jswgf)5^j0bOkqWtX%uu+4*kD z$N)h@E*RY+PtX@$QI7g1MoD)bcyFdcALTPJC_9lbQGv^1bUsSaLErpg_W0>_bWv3? zii#G{VO@!;J?G|s?Se5xGzY?A1^$ZzEIJyXWh95rK-7=B;!y^Cy3=uuxSviydasG4 zlFoh(SEgJ}2?`ib{?&s)Q~UP>*|1t3k9usMP=#pQ-Zp|+>3zfQ15B(!}?u9P(H`TJi!3fi}r&?4-14}oSWZmKu#|Nw*OFCX1LQd_lFLNeX z4|=cjux6bM!3uj`k?zJmxY)MmG<2GznIJ%&E+I3xhBJ#@T~3qZ0ZYLO z!H}dv)#LGYXZIVsFIYJIKuNYn+`f`;WQ#x$UB2~w`@^0s&CPofrVv!)d0FD8JqCk& z9B7=jQ*}<5iHEZFnATn4@7&Bi{Eni9kmSlPS@7+pDTxSU#@G9>4F-3ZY(LS9t~(&r zj%@w)@TP_rfd1g6p4joN?P$4LvJwzp!pW{7W|6GOWT0UhS57z>$OeRDd7&ZUhTte% zTi$swLmOq_Y7%4;94!K7$4Q+1&^TApf_72kDG#8N7-%~IFS);prVgH*l;>PNZYajN zJ%~N_$HA8aExg4T8|4$UkYhF73{7)7!2Ni{o!05gzltzWX!;3>7}1fvsluG6I%nFL z0(4X3%O**|nZH4Au8A?s z)9%^vMZkDE+V(%Z6x-Jwh7Q+!r{NTSe4|0-;Snbp@d2bXqXEKAVW@Nv;C=9zlS{hQOhFu=EEB zNi0$A2U`dQ+;#e^2P4N#Sm+ziowG6XsPDhT31Nfpvm;!knxd2$6;j(Oj&!N5V$W%)pV^tPoYcP_OS)(0} zzhi~Z0Kx6!CxSO*^7fIPw%@ODS594j!?UrqvNJ1k(~eoan?u}P2fI}KJLeLk4tF=+ z={Hg$>6Oo2;d#_2>!`Y1ZeB0F;rZyAGq+|YCCxit7nCz`Bkb76-s~wYbWs`B&oFCK&r;OX7{} zqBko)e^;!D0}#dfQspPJGl}?dd)-4MUESJb59hl?|61&ERq5J+jp+p;JIeY3JQxdg z`ZMkAal>~E%CfXX(oQ=L~td%DlWkDB8 z553qYAucGFU)85x?qe#)R90bm66>^#Te#o!;y*8=lpY@ZI*(E536)}jRV}+}&G*k( zhlMX5J^A2}x;elsz=k4LO{Q!GjTBxD? z-K*M1^>QFj-{5GxZb%?PY;~4nlF7!vI9}6#!xTDfo;q@Fvt8;Y!qv2=F3RB(Tzvt& zynJAL9g`vi7E?}M3%DB=$!~Q`cnqCYER+TZ!qd_&WI?|pRDHBk9hjuE&T;y2{U(R$V$|-VD8c^sSgFyF3%JNld>o@1rWhp)lWJdWYi(S%u z4O(VWWSngBwLwY!&Jzm<<1-LUxV8pk zi{~O4^AAST!P}0)Ba?>4tQ{x>py)+Qb|APa_b$WX?n9!DL{?LRw>lAgwW4a!s4kQ1 zp6jAz+4f9lYX4G_8gt{i@x=aA+*A_jsgAQdr5NxLb6=ugL~@j~4TS@aqK+CY1js@# zngg)o^wV;=r6`mF8wY+`of9|D=;hbb!W{f-w1VeDQlg*1D z=-6c-EeMA>WW_f&3raDd8&<;tB7B$>5-QHx@vKU$8) z^5qmuGztbZ4_H_Q%L5drXzLTKw4A$cps5F!?~7-V%pn;8s)tulE2lLnr;nopSr%j{ z+;=-m!AJ<@UiM7m6}^R^J&(0NkTgzMQl|`n9uXtdN+FCx2jyIgo&ZS%m|ZzCYOkKb z^_^&l>vKj=29YgXta5Em9Hs*`W&nFuR~8`5V>#NaGmT}QeG{njr2v-<;TEASUk>g_ zgb!FLGZ@A02&c$BpjNJ;F99{OGhU2Gcs@i}MPy*+%D!Fgbm$F?W0jUHF%1)w!%19>?VK%j|sJ3Jv6uG2CUc<7oeO4El4qKiKB(XMc#i5Ki(95^J`ur`Z?4#}rz(}C2&OG~o z+}uERVsl+w7+Ch2iah)#U%*KL`k<}Q$IiA}bJD3kNPo|&uW&U+f8Ix~`=n)tTtdnl+_^N%{f}fV)`+4D&e1qN=YW#R@ubL8`vPTDG^8?g>nD6|THwRlx0ILUwztV;J?R#3P`AE0 zDZa9bROt6EyLHAk?r4H#R4+_qSKiJrQ03Ql-ht)hjbnW@{fv3n!aaU>sJGavI?@}T z`5s%jb}Fi|c@59kBh@MAOGtZde2ybNmt?jON16&@*|F!c5R7?=w4hBe|QOZSKv0y!Jd4yn4E z@(UR6!QrI+(j`5U3GtiyHel+sn_|^ACb?h95cK`VzpKiZa-gjHvTy0UJUt?(q#kCBjK>H=y~kX5*+&f%aJD@I$%Uv#%)Sy1;@-+1CoYw4qjQpqA>SSb#J( zk7AO-b-d%-gn~$mwu;~H2?WS^hKdGl(JbggN3;UR5t)2p+l&U|{U+j37sQ2H5#tNRc z!AF5Q`rrh64OSHWa=+&)kyh_9W3&(4K(21<^sjHyGKjLO%`e!;sM=Z!pK$o6SD>04 zRpn^V(op}p@y8}DyHr?^B+GI>Cpmq+P0lI#yi=>ym?%Jue z^do4ZU(ZYz3Q}FgWE9|{60P2Y}izwS>v^%k9eeKTzHt7Ll8o6&T9MB#c?77)` z_w+QAD1wr4HySk6cmC3EvsD($5lI?vH00Ji#Ga8?GM$ceC1cW=w^Dme;j;WY z_&uvUuMLo@hAA{v3)&s>HzanXPw+$ zxIy8pj^@RyHtK!yA>SfOU(cUmmn1j~4DPkA(-v;nfB4NYX1Hv3h{00Gi~j8ruHz0R zt-jVkrKO>+I7N&Jfc$;CDl{{nQJbLq%LHJ`u0pPzSY-WWF9h$14$OeICD z3jX-^vQrF{%vK4b@5=(qK#~|-=6qS zkP*Foshh1)byN1XWXo3+jaZbi8m+ap-OtLv*PUoLPS)br?%JY8p>6N%4Qjmus3Hgh z9T7*rhdUBB?Rt}WVbcf^?;On39u3yMj>vBa_-*SQMw!$(*krBfdUc!+5lrYfJD?Ws(uK`oO~&DsU|LZ6WXJZ7S&rSwF`qHMkln3c-~Sy3BsS-- za(`5JY1t&o7Hr)6drMc_@q6nC9VoQqGvkE4ok8x$?;gwc2!MBTjO_GM#O~d7Aa8=T zZYi~yb)$SZ-M{5PR~X@S^7-vLp=Yya18e^D45D#(DQQ_b1w|!gRW%JwZC!msW7AdU zme#gMJ(6@nwcdnMTwc$WGovcv0+i#?CgdZ3Wi)hjD3N6)@{5tFkxchJxw|QzTS+X~ z{`A?Zn!0o6n=V{xHMS)>B;5}v(poH>OAU=bl#*8#hgLBnDVbOWpJYztmoX0)RVJ;s zry+9nTWr#Ba(ez9{BY-3NfkthxTYB>i5430y2BpwGH6o9l8{?=D-7@bWHE){oo?t) zV~Q+MeCLz432!DlOZ98%7(k@6XM4DckX}Hs{7go^>;WwoMKKZdNTgmN+~xRiLV#iX z8JBR>-fDoynK9V{9ssHBx4xCFJ|4gI`aa9u21o?Vfz;hj#W266O`TUkA8KL4!c1OP z#P0^cazRPq=o&5T8K*P_8MoA!U9(UQthHDlaPvg+uFj%6!iE-3Um9N5DIE}b-*_s; z;R?BOKi%h^&`3JCR)G*pzgXpdeC=rmD_*z4{`1`JKGKMNOX*H5YFF~t`H6GK)x*5@ zo6j9p*i;6s{O5eP{c7Otvqz7LaaT$%)w|eCAvZeOq;&XC$U*VY8*JCT>y-8;&D&!7 zD6>~u3NMrKN^iUXz>ahwgeKZSt~f^ui~K{3hMXaoNHMv5S49B0;}R5FI<8`b!B zULt$Q{%v#VMVMeG@HBS~k0`VHmcRf6z>4I^APJgzR+&F7%g#4u3t`wu7>>37IO_jk zqwH`vT1MV@C)K@ZaEF?2iW~!8xww)8K&uomFiV$qIz~HDu4rKc`tV z2)b-WJV=n*h0LVSnjX;PI7!K)s6x)30HO0ePi85;;G406i3~=}$7ZtsuRAVJmeQ;yywonv=MWpEYVSdk(#5gqHkGwV()jJv#I~(_3gN zg|sw9wkA=M(mi-4am4rpi}*CH8|`TY0>V9yeW$8Or}rI`K>)0=D7Nh*qrd)$3xY_^f9cb_ZPoYK7Ian4*xKhFe zzM@w4!|&*ZN}-f*3u^ESfKhA$tyxMpjmbvRo zy43SCw?qQK(gtNT8^Dp`ms8z@LTzK!u)OahklTajavup$TJ1B8Uav$lpT~k*EA> z7{;P3$i$YRm-talHT}BO3`CkOu1+Ju{sGBn5 z@3Cf;05Waih5QiI8i=N)Z1>Ad_=sRn8+d9wxZi$IgdxWyBs_hV)4L``?ZpBudE?>? z@zQ=j>p@khFcGaaJ%W|1X!$c60O1TO{UwvRCD)(IKntEmkN=?X`MlY!{~Q%)_#O2T zKbq`)xapm_KTrR(Xgk_W&yFth*Pk`pNuO!4kxN+y@)& z3_R>q1sMa(cH$Nb?Xnx>~W4Pn{P>5?bQzf{!S3o zA{!z*+?C=tem(q*`X`B;hv6092tuKSFAjliS%!voTQMnoUQW-=i+`g%FR`B@D6(DT zM)@J&t&hMkYre6c69d(*2I?Xzg$$b7KbgGLA3tL{zgd3fM2@(0>*1+rL$B-5usfr8 zzxU|UixZSNWNqViEYaAF*Xz1pY0z$9xjCq)(lGGRr=Ck>G?91D;??G~a_8PFN@+Lx zg(%t;XG4wy672<)JO|w$uBvzrgx1wxuN+hJPfRbY>S$5t2WX%@QIlKS*SPp@eZk#x z{HAz#PJ$-P)3V(Aa-#_C>tVe5ey|1Bzc%~lEi=pXDFZUXN&G$`k`8WNcyD99XI(0? zGS&UZyxq3A^2HAv&c*T3HUH>>FAv5@xg5u?pSJcNl<@v6_jr#zvj!^!wq^_-+a{@J zFxqI+?-8Tv4TSKc;m#o1Ypg51!}Y>c4zL~Q`_SE>CDtKc?S5b}zhmXKY`YdJGFE09 z!X%hw4d-EghmXq`T4e92eaeN)1_k$HYwxp8#T&qPz}Y@dtBC;BKobpr2B#NQR%8fl z8MmMT0>M*?F6^2$wK-DV8is`Be#+^KMk8bIKJ+%*A*2H|sk>31dRSQ{bywlssKyK@ z^ia#ig|0!`2N;=?7ipupsRSvMqX0f41rNk8j8#{CaQ5f}7?%xA_eD8+18aD>Ql;P|pd4OEP5eQrsL)cp^0O978%*4 z(7z*jr~%df}Kx^ zdny=^`XELPu}$e+_3zzF?uFj&22c@QU#HpPf5zTb8%BFN#-ue8y*@iIDT#m?FL8tD8kp_q?JrmlRs^u2rwT+ zxoA-C;{LI^2;(3|Xg%75SQhM7+Z^|0m2|kBKMgnlpSVlJ9xuonkuM6rRL+uU0MZ|T z#JtJta{2RD&;2Ac&BkaVpY-DY3U8%);McO6gNBv9(6%hRR$3)T#c84?-%C=YsgFyn ziUm3!To+*7mu@@ADaAze9;OZttd*k|T-E~s)?z7!v3d+#i%iT>E_v-1cO@!bW8I2L zx!LoD;W+g-m|g^(K0&>ju`5MZE&|vnAlKgXZ>r{z4rzwk8Ir<;s}v!db8bmg8nG)T zx^Nbiu1N`!i}*d)1d!A)eF2-sC~9e?iKI4-@Cx@j?Q$w*V`Ljfx?VUR96-Jv;Y?l> z)1~?}@eY}aqi9HPT+7=ys)vQH-JvA(2p*J|D=GDF$uH%McZNnhrp!R6IJ%9|gkOGA zW&l`t4SizOdYk+@ak0l_CpNdQO*U%BC)qwQBb(&QB?TM9C3+uWODy(IIMJk|Hm;ussr@^&@F2t|L8na@^)21D)$yPSMN=s*5{V^tkI5K>g$S zEm|eSP$Bu2bu{PuG&q*$X0-9TSX<0*g6 z*yTj)CD|_nvgz*(Xd5U@1sP!xo}4i+19qLqC%?Lym3dX39YZ7`TDC1?Xi@zbpPaCK zs}O-BxxukcX^wi^Y6aQ1Jg8`Y8QKF(+$syRCQ$hpZ=Tzvg?$?m}!gQNmNhNjdg zpFotpHFrgxo}*_s@4WdR-$bmV#2m~&zlQS8bhZY+iedHKa_11jcNhasqI`NaYw-03 zT*dygrtPeZkW^a#q-XlaJBAuj1s@rZLIhFw}GB@+KR2a5b921!w*-!YP&t_Dz8!PcT!oYxQCRrxhbbo8{2R}6> z_{G)2uWrosGbrj?C#{;TlV-{{)3i0fEp*go(f*n{$eEYR6_~2h*BryH?>O45C+(PC zztVxsY>g!022~MwgZQ%QjOz+8EhGhlNH1+1%9|DO)YyWY%{w!(Z*Uxw<7DgvOoP75WA`xD2hPuQp7%qx^|ZqTkVyXrPgO{k;T)Ujy*U{!HP=BNtJw%%k~tQb|H1=84ZRa zlTAo=zeCK>dcvF!)j-rzHxo$yXB`RG-Vs%Q|D_UmFh4}KrC*zKs9Ytf$jL|q)dm#J zaYNDriZ1>KxgfbsnzXa}=MN=ye!S9Yyxh4hq^O5x)EfTKz=g5$NqRTux+$x4r{?@G zVNnei-0N?^Fgk}^YhG@-XKe%iNbx?=Mw z&ADZ%9z^~9#&@y08uhd2;n*gHSx80q{NCw1(#-BXJ#T0!x*O`hXBJXV6(0w&;d>%j z9S7sjhx%P`)K4)B5AfuQ=ccpU8WVYX#0#S}KSy8LN2g-DcJuNVKANnb2d48f25yk4%^UNu3IP zc($eDNhlw8ZR#2iwgaRRPF*3R`0BvnFDzxmnr16Ocvkj?mI4MSj_y z1K9jVJ6WFcna0wo?}Z)Mwju7w1E#{@5uYme0eL z4gScln7{3cVkQ-Y|EUi}M)DVY#>NYX8B|A-!i+oeME};zlQ?5ubx-?tP0qQmNgzT; z5V}NA;Y#^v{BXV-OiJOVbz5j!6|E)AxSO_IHtJ6nh?byQGmo*9T+`{_Uf<3av zkIHzu*Q46v7jH3(&Ez$M-u4>INmVDk6@Q6;HIS|l(bxDPeOR39Y#rUK_G67Cj_87y zRH5Ca%qF{JJ9^_hO)($EzMOyy!5v35pN>?K1lzv6dF_(;YRk8`Bim7tcS3#t(1u^N zS*`kt7k@q6aXJGMImprPwzcPhoBFZKTTWPBdwl%+?w9M@@0=@0_KCVueB1iz1rtL=bxT6@U>(Yvrn_d{&kJH}(n&(ze2J`Nk3Y2b z=Ior`-(N#7Rc?tk!zZXN`!-oS&1rE^#Q5JMNB>rFSIb2C91`gKjvF}yBw(|XV^Ll@ zG>*pQw}+f+?i*f-y%}-&sp+-%e|WvK=AChCYaUELy_c4t8!EO?+R)-e%^x_9(Ig6` z%?DY;1~n}`BQpydhqX>_o;+zboj&M_wn{e%a#_Gzkw9pYOW%;;#k1^a)Zv zkrp{d)pm`f|Kn&NCwb!c(8lIe^ZppCDH4t4oGsmUc);AC~j(nGc+jEt%QEo(JgX=)ojNx z>_tw_(90LM(>}x6*pnJG#~5M{@2WZ;G6O+$A)KysSb1tk_mhk)j8}c8(m((7KEGRb z;})V&DzZBqwSzzuZL^X}6~Wk5hv?5_K4MtiK$SqL&VJd=})F!5}p8g@C zq?23I5CD;?vIxiUfD0++Q^d9WD(P`924|w=2}tRDrnD@6egX@uXaP|pOQIgBCc=}t z<3aZ0Rnn_Ncch_-sSPqR{w^sTo*A9(jZ;6g<=pAj-T(e3;E3VrJawY=nS8&&8V+<~ z)JIi{(!f4fQ`GhC7>nRV2CzbNX|IDN#jc-_b&L%s;eWTP)t;;NEM}uy*xH9^dS$&Q z?BR3SJItD3Bx-~dG8-IV0hf6g309DAHpP1kgM8o;<&ryk?CKT9;u ztM@weAI9yh`lAb-^KCtuAuI{ow4Li%s%BkZ?>A*SPIDkp{_vw`mu`z;YKsu15Zcm> z(>~SGBV{@5-_k4VOl|E`@C$A2SK5B6bwG9Rbn6|BG-}(Rc4=takbcdnwqfJT(`_TG xda0L3EgytlzH9sH)a9|&U#BnMTZd9>A9q&R+CJf~eY*Yr2FvGGHyHq|{vY3UGB5xD literal 0 HcmV?d00001 From 24048739b03c5c658fc496fd3d6c08757103d237 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:07:41 -0500 Subject: [PATCH 17/25] Use an immutable input environment. --- R/tundraContainer-initialize.R | 19 ++++++++++--------- R/tundraContainer-train.R | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R index 3e187bb..2913d75 100644 --- a/R/tundraContainer-initialize.R +++ b/R/tundraContainer-initialize.R @@ -33,15 +33,16 @@ initialize <- function(keyword, train_function = identity, "stageRunner object.") } - .keyword <<- keyword - .train_function <<- train_function - .predict_function <<- predict_function - .munge_procedure <<- munge_procedure - .default_args <<- default_args - .internal <<- internal + self$.keyword <<- keyword + self$.train_function <<- train_function + self$.predict_function <<- predict_function + self$.munge_procedure <<- munge_procedure + self$.default_args <<- default_args + self$.internal <<- internal - .input <<- list_to_env(list()) - .output <<- list_to_env(list()) - .internal <<- list_to_env(list()) + self$.input <<- list_to_env(list()) + lockEnvironment(self$.input) + self$.output <<- list_to_env(list()) + self$.internal <<- list_to_env(list()) } diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index e47c935..e88d460 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -38,7 +38,7 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) output <- call_with( self$.train_function, list(dataframe), - list(input = self$.input, output = self$.output) + list(input = list_to_env(train_args, self$.input), output = self$.output) ) private$run_hooks("train_finalize") From 4cf3a258f843d92c6c480940265c0c0b159c6ad9 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:09:51 -0500 Subject: [PATCH 18/25] Test simple example for tundraContainer train. --- R/tundraContainer-train.R | 2 +- tests/testthat/test-train.R | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-train.R diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index e88d460..1b76dba 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -29,7 +29,7 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) force(munge) private$run_hooks("train_pre_munge") - if (isTRUE(munge) && length(munge_procedure) > 0) { + if (isTRUE(munge) && length(self$.munge_procedure) > 0) { dataframe <- munge(dataframe, self$.munge_procedure, verbose) attr(dataframe, "mungepieces") <- NULL } diff --git a/tests/testthat/test-train.R b/tests/testthat/test-train.R new file mode 100644 index 0000000..cb7c1f4 --- /dev/null +++ b/tests/testthat/test-train.R @@ -0,0 +1,11 @@ +context("tundraContainer$train") + +test_that("it can train a simple example", { + container <- tundraContainer$new("foo", function(data) { + output$data <- data + }) + + container$train(iris) + expect_identical(container$.output$data, iris) +}) + From 9541ff73cfbb36373fc02bf5c245224627cf41f6 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:30:29 -0500 Subject: [PATCH 19/25] Initial tundra predict method. --- R/tundraContainer-predict.R | 24 ++++++++++++++++++++++++ R/tundraContainer-train.R | 7 +++++-- R/utils.r | 24 ++++++++++++++++++++++++ man/list_merge.Rd | 29 +++++++++++++++++++++++++++++ man/predict.Rd | 37 +++++++++++++++++++++++++++++++++++++ man/train.Rd | 2 +- man/tundraContainer.Rd | 2 +- 7 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 R/tundraContainer-predict.R create mode 100644 man/list_merge.Rd create mode 100644 man/predict.Rd diff --git a/R/tundraContainer-predict.R b/R/tundraContainer-predict.R new file mode 100644 index 0000000..2bfb9eb --- /dev/null +++ b/R/tundraContainer-predict.R @@ -0,0 +1,24 @@ +#' Predict on a dataset using a trained tundraContainer. +#' +#' @param dataframe data.frame. The data set to generate predictions on +#' with the trained model. The data will be preprocessed with the +#' \code{tundraContainer}'s trained \code{munge_procedure} and +#' then passed as the first argument to the \code{tundraContainer}'s +#' \code{predict_function}. +#' @param predict_args list. A list of arguments to pass to pass to the +#' \code{tundraContainer}'s \code{predict_function} as its second argument. +#' @param verbose logical. Either \code{TRUE} or \code{FALSE}, by +#' default the latter. If \code{TRUE}, then output produced by +#' running the \code{munge_procedure} or the \code{predict_function} +#' will not be silenced. +#' @param munge logical. Either \code{TRUE} or \code{FALSE}, by +#' default the former. If \code{FALSE}, the \code{munge_procedure} +#' provided to the container during initialization will be used to +#' preprocess the given \code{dataframe}. +#' @return The value returned by the \code{tundraContainer}'s +#' \code{predict_function}, usually a numeric vector or +#' \code{data.frame} of predictions. +predict <- function(dataframe, predict_args = list(), verbose = FALSE, munge = TRUE) { + +} + diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index 1b76dba..bed00e6 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -4,7 +4,7 @@ #' will be preprocessed with the \code{tundraContainer}'s #' \code{munge_procedure} and then passed as the first argument to #' the \code{tundraContainer}'s \code{train_function}. -#' @param train_args list. A list of arguments to pass to make available +#' @param train_args list. A list of arguments to make available #' to the \code{tundraContainer}'s \code{train_function} through #' use of the \code{input} keyword. See the examples. #' @param verbose logical. Either \code{TRUE} or \code{FALSE}, by @@ -38,7 +38,10 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) output <- call_with( self$.train_function, list(dataframe), - list(input = list_to_env(train_args, self$.input), output = self$.output) + list( + input = list_to_env(list_merge(default_args, train_args), self$.input), + output = self$.output + ) ) private$run_hooks("train_finalize") diff --git a/R/utils.r b/R/utils.r index e63f281..8139c8a 100644 --- a/R/utils.r +++ b/R/utils.r @@ -41,3 +41,27 @@ call_with <- function(fn, args, with) { do.call(fn, args) } +#' Merge two lists and overwrite latter entries with former entries +#' if names are the same. +#' +#' For example, \code{list_merge(list(a = 1, b = 2), list(b = 3, c = 4))} +#' will be \code{list(a = 1, b = 3, c = 4)}. +#' @param list1 list +#' @param list2 list +#' @return the merged list. +#' @examples \dontrun{ +#' stopifnot(identical(list_merge(list(a = 1, b = 2), list(b = 3, c = 4)), +#' list(a = 1, b = 3, c = 4))) +#' stopifnot(identical(list_merge(NULL, list(a = 1)), list(a = 1))) +#' } +list_merge <- function(list1, list2) { + list1 <- list1 %||% list() + # Pre-allocate memory to make this slightly faster. + list1[Filter(function(x) nchar(x) > 0, names(list2) %||% c())] <- NULL + for (i in seq_along(list2)) { + name <- names(list2)[i] + if (!identical(name, NULL) && !identical(name, "")) list1[[name]] <- list2[[i]] + else list1 <- append(list1, list(list2[[i]])) + } + list1 +} diff --git a/man/list_merge.Rd b/man/list_merge.Rd new file mode 100644 index 0000000..b52bb85 --- /dev/null +++ b/man/list_merge.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/utils.r +\name{list_merge} +\alias{list_merge} +\title{Merge two lists and overwrite latter entries with former entries +if names are the same.} +\usage{ +list_merge(list1, list2) +} +\arguments{ +\item{list1}{list} + +\item{list2}{list} +} +\value{ +the merged list. +} +\description{ +For example, \code{list_merge(list(a = 1, b = 2), list(b = 3, c = 4))} +will be \code{list(a = 1, b = 3, c = 4)}. +} +\examples{ +\dontrun{ +stopifnot(identical(list_merge(list(a = 1, b = 2), list(b = 3, c = 4)), + list(a = 1, b = 3, c = 4))) +stopifnot(identical(list_merge(NULL, list(a = 1)), list(a = 1))) +} +} + diff --git a/man/predict.Rd b/man/predict.Rd new file mode 100644 index 0000000..c2cbbcb --- /dev/null +++ b/man/predict.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tundraContainer-predict.R +\name{predict} +\alias{predict} +\title{Predict on a dataset using a trained tundraContainer.} +\usage{ +predict(dataframe, predict_args = list(), verbose = FALSE, munge = TRUE) +} +\arguments{ +\item{dataframe}{data.frame. The data set to generate predictions on +with the trained model. The data will be preprocessed with the +\code{tundraContainer}'s trained \code{munge_procedure} and +then passed as the first argument to the \code{tundraContainer}'s +\code{predict_function}.} + +\item{predict_args}{list. A list of arguments to pass to pass to the +\code{tundraContainer}'s \code{predict_function} as its second argument.} + +\item{verbose}{logical. Either \code{TRUE} or \code{FALSE}, by +default the latter. If \code{TRUE}, then output produced by +running the \code{munge_procedure} or the \code{predict_function} +will not be silenced.} + +\item{munge}{logical. Either \code{TRUE} or \code{FALSE}, by +default the former. If \code{FALSE}, the \code{munge_procedure} +provided to the container during initialization will be used to +preprocess the given \code{dataframe}.} +} +\value{ +The value returned by the \code{tundraContainer}'s + \code{predict_function}, usually a numeric vector or + \code{data.frame} of predictions. +} +\description{ +Predict on a dataset using a trained tundraContainer. +} + diff --git a/man/train.Rd b/man/train.Rd index 330b91c..359ea44 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -12,7 +12,7 @@ will be preprocessed with the \code{tundraContainer}'s \code{munge_procedure} and then passed as the first argument to the \code{tundraContainer}'s \code{train_function}.} -\item{train_args}{list. A list of arguments to pass to make available +\item{train_args}{list. A list of arguments to make available to the \code{tundraContainer}'s \code{train_function} through use of the \code{input} keyword. See the examples.} diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 095e51e..d3c8f20 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From bd143270380e890a82d5a34d87e2e13269ee4c0a Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:36:33 -0500 Subject: [PATCH 20/25] Working tundraContainer predict method. --- R/tundraContainer-predict.R | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/R/tundraContainer-predict.R b/R/tundraContainer-predict.R index 2bfb9eb..b471686 100644 --- a/R/tundraContainer-predict.R +++ b/R/tundraContainer-predict.R @@ -19,6 +19,35 @@ #' \code{predict_function}, usually a numeric vector or #' \code{data.frame} of predictions. predict <- function(dataframe, predict_args = list(), verbose = FALSE, munge = TRUE) { + if (!isTRUE(self$.trained)) { + stop("Tundra model ", sQuote(self$.keyword), " has not been trained yet.") + } + force(verbose) + force(munge) + force(predict_args) + + private$run_hooks("predict_pre_munge") + if (isTRUE(munge) && length(self$.munge_procedure) > 0) { + initial_nrow <- NROW(datafram) + dataframe <- munge(dataframe, self$.munge_procedure, verbose) + if (NROW(dataframe) != initial_nrow) { + warning("Some rows were removed during data preparation. ", + "Predictions will not match input dataframe.") + } + } + private$run_hooks("predict_post_munge") + + if (length(formals(self$.predict_function) < 2 || missing(predict_args))) { + args <- list(dataframe) + } else { + args <- list(dataframe, predict_args) + } + + call_with( + self$.predict_function, + args + list(input = self$.input, output = self$.output) + ) } From 77702dbc96e718cd8a09fc65b51a9382ea3353f4 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:39:44 -0500 Subject: [PATCH 21/25] Flesh out tundraContainer definition. --- R/tundraContainer.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/tundraContainer.R b/R/tundraContainer.R index f4f2b0a..6741725 100644 --- a/R/tundraContainer.R +++ b/R/tundraContainer.R @@ -17,7 +17,16 @@ tundraContainer <- R6::R6Class("tundraContainer", .hooks = NULL, # list initialize = initialize, - train = train + train = train, + predict = predict, + #add_hook = add_hook, + + munge = function(dataframe, steps = TRUE) { + mungebits::munge(dataframe, munge_procedure[steps]) + }, + show = function() { + cat("A tundraContainer of type ", sQuote(self$.keyword), "\n") + } ), private = list( From 0eab12e1e17efa31a162e24a891f85e3950b9b6c Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:43:07 -0500 Subject: [PATCH 22/25] Fix train documentation. --- R/tundraContainer-initialize.R | 2 +- R/tundraContainer-predict.R | 2 +- R/utils.r | 1 + man/initialize.Rd | 2 +- man/tundraContainer.Rd | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R index 2913d75..13eeb10 100644 --- a/R/tundraContainer-initialize.R +++ b/R/tundraContainer-initialize.R @@ -15,7 +15,7 @@ #' \code{\link[mungebits]{mungepiece}}s to apply to data sets #' during prediction. #' @param default_args list. A list of default arguments to provide to -#' the second argument to the \code{train_function}. The additional +#' the second argument of the \code{train_function}. The additional #' arguments provided to the \code{tundraContainer}'s \code{train} #' method will be merged on top of these defaults. #' @param internal list. Internal metadata that should accompany the diff --git a/R/tundraContainer-predict.R b/R/tundraContainer-predict.R index b471686..2d1cd9a 100644 --- a/R/tundraContainer-predict.R +++ b/R/tundraContainer-predict.R @@ -46,7 +46,7 @@ predict <- function(dataframe, predict_args = list(), verbose = FALSE, munge = T call_with( self$.predict_function, - args + args, list(input = self$.input, output = self$.output) ) } diff --git a/R/utils.r b/R/utils.r index 8139c8a..90dd785 100644 --- a/R/utils.r +++ b/R/utils.r @@ -65,3 +65,4 @@ list_merge <- function(list1, list2) { } list1 } + diff --git a/man/initialize.Rd b/man/initialize.Rd index efcd6cc..eaab140 100644 --- a/man/initialize.Rd +++ b/man/initialize.Rd @@ -27,7 +27,7 @@ value).} during prediction.} \item{default_args}{list. A list of default arguments to provide to -the second argument to the \code{train_function}. The additional +the second argument of the \code{train_function}. The additional arguments provided to the \code{tundraContainer}'s \code{train} method will be merged on top of these defaults.} diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index d3c8f20..28a0696 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 8ea88b3295a21293508247b542e8f7a3e6a18641 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:45:13 -0500 Subject: [PATCH 23/25] Fix some typos. --- R/tundraContainer-initialize.R | 2 +- R/tundraContainer-predict.R | 2 +- man/initialize.Rd | 2 +- man/predict.Rd | 2 +- man/tundraContainer.Rd | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/tundraContainer-initialize.R b/R/tundraContainer-initialize.R index 13eeb10..d092959 100644 --- a/R/tundraContainer-initialize.R +++ b/R/tundraContainer-initialize.R @@ -7,7 +7,7 @@ #' a list of additional parameters used for training the model. #' @param predict_function function. The function used to predict #' on new datasets. Its first argument will be a data.frame, -#' the dataset to predict on it, and its second (optional) +#' the dataset to predict on, and its second (optional) #' argument will be additional parameters used for prediction #' output (such as whether to return a probabilistic or absolute #' value). diff --git a/R/tundraContainer-predict.R b/R/tundraContainer-predict.R index 2d1cd9a..dbe191a 100644 --- a/R/tundraContainer-predict.R +++ b/R/tundraContainer-predict.R @@ -5,7 +5,7 @@ #' \code{tundraContainer}'s trained \code{munge_procedure} and #' then passed as the first argument to the \code{tundraContainer}'s #' \code{predict_function}. -#' @param predict_args list. A list of arguments to pass to pass to the +#' @param predict_args list. A list of arguments to pass to the #' \code{tundraContainer}'s \code{predict_function} as its second argument. #' @param verbose logical. Either \code{TRUE} or \code{FALSE}, by #' default the latter. If \code{TRUE}, then output produced by diff --git a/man/initialize.Rd b/man/initialize.Rd index eaab140..1711817 100644 --- a/man/initialize.Rd +++ b/man/initialize.Rd @@ -17,7 +17,7 @@ a list of additional parameters used for training the model.} \item{predict_function}{function. The function used to predict on new datasets. Its first argument will be a data.frame, -the dataset to predict on it, and its second (optional) +the dataset to predict on, and its second (optional) argument will be additional parameters used for prediction output (such as whether to return a probabilistic or absolute value).} diff --git a/man/predict.Rd b/man/predict.Rd index c2cbbcb..af13f85 100644 --- a/man/predict.Rd +++ b/man/predict.Rd @@ -13,7 +13,7 @@ with the trained model. The data will be preprocessed with the then passed as the first argument to the \code{tundraContainer}'s \code{predict_function}.} -\item{predict_args}{list. A list of arguments to pass to pass to the +\item{predict_args}{list. A list of arguments to pass to the \code{tundraContainer}'s \code{predict_function} as its second argument.} \item{verbose}{logical. Either \code{TRUE} or \code{FALSE}, by diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index 28a0696..d734ff1 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 5fb0fa631d6f5f82354a72dc1c431284e4f83f02 Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:48:15 -0500 Subject: [PATCH 24/25] A couple of documentation fixes. --- R/tundraContainer-predict.R | 4 ++-- R/tundraContainer-train.R | 5 +++-- man/predict.Rd | 4 ++-- man/train.Rd | 5 +++-- man/tundraContainer.Rd | 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/R/tundraContainer-predict.R b/R/tundraContainer-predict.R index dbe191a..055505b 100644 --- a/R/tundraContainer-predict.R +++ b/R/tundraContainer-predict.R @@ -1,6 +1,6 @@ #' Predict on a dataset using a trained tundraContainer. #' -#' @param dataframe data.frame. The data set to generate predictions on +#' @param dataframe data.frame. The dataset to generate predictions on #' with the trained model. The data will be preprocessed with the #' \code{tundraContainer}'s trained \code{munge_procedure} and #' then passed as the first argument to the \code{tundraContainer}'s @@ -12,7 +12,7 @@ #' running the \code{munge_procedure} or the \code{predict_function} #' will not be silenced. #' @param munge logical. Either \code{TRUE} or \code{FALSE}, by -#' default the former. If \code{FALSE}, the \code{munge_procedure} +#' default the former. If \code{TRUE}, the \code{munge_procedure} #' provided to the container during initialization will be used to #' preprocess the given \code{dataframe}. #' @return The value returned by the \code{tundraContainer}'s diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index bed00e6..0d2d656 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -1,6 +1,6 @@ #' Train a model encapsulated within a tundraContainer. #' -#' @param dataframe data.frame. The data set to train the model on. This +#' @param dataframe data.frame. The dataset to train the model on. This #' will be preprocessed with the \code{tundraContainer}'s #' \code{munge_procedure} and then passed as the first argument to #' the \code{tundraContainer}'s \code{train_function}. @@ -18,7 +18,8 @@ #' be run through it. #' @return The value returned by the \code{tundraContainer}'s #' \code{train_function}. Since the \code{train_function} has side effects -#' on the container, this can usually be \code{invisible(NULL)}. +#' on the container as its primary purpose, this can usually be +#' \code{invisible(NULL)}. train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) { if (isTRUE(self$.trained)) { stop("The tundra ", sQuote(self$.keyword), " model has already been trained.") diff --git a/man/predict.Rd b/man/predict.Rd index af13f85..081c81e 100644 --- a/man/predict.Rd +++ b/man/predict.Rd @@ -7,7 +7,7 @@ predict(dataframe, predict_args = list(), verbose = FALSE, munge = TRUE) } \arguments{ -\item{dataframe}{data.frame. The data set to generate predictions on +\item{dataframe}{data.frame. The dataset to generate predictions on with the trained model. The data will be preprocessed with the \code{tundraContainer}'s trained \code{munge_procedure} and then passed as the first argument to the \code{tundraContainer}'s @@ -22,7 +22,7 @@ running the \code{munge_procedure} or the \code{predict_function} will not be silenced.} \item{munge}{logical. Either \code{TRUE} or \code{FALSE}, by -default the former. If \code{FALSE}, the \code{munge_procedure} +default the former. If \code{TRUE}, the \code{munge_procedure} provided to the container during initialization will be used to preprocess the given \code{dataframe}.} } diff --git a/man/train.Rd b/man/train.Rd index 359ea44..5585876 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -7,7 +7,7 @@ train(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) } \arguments{ -\item{dataframe}{data.frame. The data set to train the model on. This +\item{dataframe}{data.frame. The dataset to train the model on. This will be preprocessed with the \code{tundraContainer}'s \code{munge_procedure} and then passed as the first argument to the \code{tundraContainer}'s \code{train_function}.} @@ -30,7 +30,8 @@ be run through it.} \value{ The value returned by the \code{tundraContainer}'s \code{train_function}. Since the \code{train_function} has side effects - on the container, this can usually be \code{invisible(NULL)}. + on the container as its primary purpose, this can usually be + \code{invisible(NULL)}. } \description{ Train a model encapsulated within a tundraContainer. diff --git a/man/tundraContainer.Rd b/man/tundraContainer.Rd index d734ff1..71a1dea 100644 --- a/man/tundraContainer.Rd +++ b/man/tundraContainer.Rd @@ -4,7 +4,7 @@ \name{tundraContainer} \alias{tundraContainer} \title{A standard container format for classifiers developed in R.} -\format{\preformatted{Class 'R6ClassGenerator' +\format{\preformatted{Class 'R6ClassGenerator' - attr(*, "name")= chr "tundraContainer_generator" }} \usage{ From 934337110acd264f2153926ef4b7e4ccbb49a69e Mon Sep 17 00:00:00 2001 From: RobertZK Date: Sun, 2 Aug 2015 12:49:53 -0500 Subject: [PATCH 25/25] Use self correctly here. --- R/tundraContainer-train.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tundraContainer-train.R b/R/tundraContainer-train.R index 0d2d656..6ab6f15 100644 --- a/R/tundraContainer-train.R +++ b/R/tundraContainer-train.R @@ -40,7 +40,7 @@ train <- function(dataframe, train_args = list(), verbose = FALSE, munge = TRUE) self$.train_function, list(dataframe), list( - input = list_to_env(list_merge(default_args, train_args), self$.input), + input = list_to_env(list_merge(self$.default_args, train_args), self$.input), output = self$.output ) )