Merge pull request #309 from tlverse/devel

tlverse · Feb 2, 2021 · a119d47 · a119d47
2 parents 7b31005 + 601206e
commit a119d47
Show file tree

Hide file tree

Showing 328 changed files with 18,735 additions and 10,346 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -11,6 +11,7 @@ env:
 language: R
 sudo: required
 cache: packages
+
 cran: http://cran.rstudio.com
 warnings_are_errors: true
 r_build_args: "--no-manual"
@@ -20,30 +21,32 @@ r:
   - release
   - devel
 
+addons:
+  apt:
+    - sources:
+      - deadsnakes
+
 before_install:
   - sudo apt-get -y install libx11-dev mesa-common-dev libglu1-mesa-dev
-  - sudo apt-get -y install python3 python3-pip python3-setuptools
-  - sudo -H pip3 install --quiet --upgrade pip setuptools
-  - sudo -H pip3 install --quiet --ignore-installed numpy tensorflow keras
-  #- Rscript -e 'update.packages(ask = FALSE)'
+  - sudo apt-get -y install python3.6 python3.6-dev
+  - curl https://bootstrap.pypa.io/get-pip.py | sudo -H python3.6
+  - python3.6 -m pip install --upgrade pip setuptools
+  - python3.6 -m pip install --quiet --ignore-installed numpy tensorflow keras
+  - sudo $(which R) CMD javareconf
 
 r_packages:
   - devtools
   - covr
   - sessioninfo
   - data.table
+  - delayed
+  - hal9001
+  - haldensify
 
 r_github_packages:
   - r-lib/covr
   - r-lib/sessioninfo
-  - osofr/simcausal
-  - osofr/condensier
   - tlverse/origami
-  - tlverse/delayed
-  - tlverse/hal9001
-  - nhejazi/haldensify
-  - tpospisi/cdetools/r
-  - tpospisi/RFCDE/r
 
 after_success:
   - travis_wait 80 Rscript -e 'covr::codecov()'

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sl3
 Title: Pipelines for Machine Learning and Super Learning
-Version: 1.3.7
+Version: 1.4.2
 Authors@R: c(
     person("Jeremy", "Coyle", email = "jeremyrcoyle@gmail.com",
            role = c("aut", "cre", "cph"),
@@ -15,22 +15,25 @@ Authors@R: c(
            role = "aut",
            comment = c(ORCID = "0000-0002-7404-8088")),
     person("Rachael", "Phillips", email = "rachaelvphillips@berkeley.edu",
-           role = "ctb",
+           role = "aut",
            comment = c(ORCID = "0000-0002-8474-591X")),
     person("Weixin", "Cai", email = "wcai@berkeley.edu",
            role = "ctb",
            comment = c(ORCID = "0000-0003-2680-3066")),
     person("Yulun", "Wu", email = "yulun_wu@berkeley.edu",
+           role = "ctb"),
+    person("Hugh", "Jiang", email = "hugh_jiang@berkeley.edu",
            role = "ctb")
   )
 Maintainer: Jeremy Coyle <jeremyrcoyle@gmail.com>
-Description: Modern implementation of the Super Learner prediction algorithm,
-    coupled with a general-purpose framework for machine learning via pipelines.
+Description: A modern implementation of the Super Learner prediction algorithm,
+    coupled with a general-purpose framework for composing arbitrary pipelines
+    for machine learning tasks.
 Depends: R (>= 2.14.0)
 Imports:
     data.table,
     assertthat,
-    origami (>= 1.0.0),
+    origami (>= 1.0.3),
     R6,
     uuid,
     BBmisc,
@@ -39,23 +42,22 @@ Imports:
     utils,
     methods,
     ggplot2,
-    stringr,
     digest,
-    imputeMissings
+    imputeMissings,
+    dplyr,
+    caret
 Suggests:
     testthat,
     rmarkdown,
     devtools,
-    dplyr,
     R.rsp,
     future,
     knitr,
+    stringr,
     reticulate,
     rgl,
     rJava,
     bartMachine,
-    simcausal,
-    condensier,
     cvAUC,
     e1071,
     earth,
@@ -82,14 +84,8 @@ Suggests:
     dbarts,
     gam (>= 1.15.0),
     haldensify,
-    RFCDE,
-    caret,
-    mgcv
-Remotes:
-    github::osofr/condensier,
-    github::tlverse/hal9001,
-    github::nhejazi/haldensify,
-    github::tpospisi/RFCDE/r
+    mgcv,
+    hts
 License: GPL-3
 URL: https://tlverse.org/sl3
 BugReports: https://github.com/tlverse/sl3/issues
@@ -99,5 +95,5 @@ LazyLoad: yes
 VignetteBuilder:
     knitr,
     R.rsp
-RoxygenNote: 7.0.2
+RoxygenNote: 7.1.1.9000
 Roxygen: list(markdown = TRUE, old_usage = TRUE, r6 = FALSE)
diff --git a/Makefile b/Makefile
@@ -2,7 +2,6 @@ md:
 	Rscript -e "rmarkdown::render('README.Rmd', output_file = 'README.md')"
 
 site:
-	Rscript -e "rmarkdown::render('README.Rmd', output_file = 'README.md')"
 	Rscript -e "pkgdown::build_site()"
 
 check:
@@ -32,5 +31,5 @@ coverage:
 style:
 	Rscript -e "styler::style_pkg()"
 
-pr: style doc check site
+pr: style check md site
 	echo "If all checks have passed, you are ready to submit PR"
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,7 +10,6 @@ export(Lrnr_base)
 export(Lrnr_bilstm)
 export(Lrnr_bound)
 export(Lrnr_caret)
-export(Lrnr_condensier)
 export(Lrnr_cv)
 export(Lrnr_cv_selector)
 export(Lrnr_dbarts)
@@ -26,41 +25,47 @@ export(Lrnr_glm)
 export(Lrnr_glm_fast)
 export(Lrnr_glmnet)
 export(Lrnr_grf)
+export(Lrnr_gru_keras)
+export(Lrnr_gts)
 export(Lrnr_h2o_classifier)
 export(Lrnr_h2o_glm)
 export(Lrnr_h2o_grid)
 export(Lrnr_h2o_mutator)
 export(Lrnr_hal9001)
 export(Lrnr_haldensify)
+export(Lrnr_hts)
 export(Lrnr_independent_binomial)
 export(Lrnr_lstm)
+export(Lrnr_lstm_keras)
 export(Lrnr_mean)
+export(Lrnr_multiple_ts)
 export(Lrnr_multivariate)
+export(Lrnr_nnet)
 export(Lrnr_nnls)
 export(Lrnr_optim)
 export(Lrnr_pca)
 export(Lrnr_pkg_SuperLearner)
 export(Lrnr_pkg_SuperLearner_method)
 export(Lrnr_pkg_SuperLearner_screener)
-export(Lrnr_pkg_condensier_logisfitR6)
 export(Lrnr_polspline)
 export(Lrnr_pooled_hazards)
 export(Lrnr_randomForest)
 export(Lrnr_ranger)
 export(Lrnr_revere_task)
-export(Lrnr_rfcde)
 export(Lrnr_rpart)
 export(Lrnr_rugarch)
-export(Lrnr_screener_corP)
-export(Lrnr_screener_corRank)
-export(Lrnr_screener_randomForest)
+export(Lrnr_screener_augment)
+export(Lrnr_screener_coefs)
+export(Lrnr_screener_correlation)
+export(Lrnr_screener_importance)
 export(Lrnr_sl)
 export(Lrnr_solnp)
 export(Lrnr_solnp_density)
 export(Lrnr_stratified)
 export(Lrnr_subset_covariates)
 export(Lrnr_svm)
 export(Lrnr_tsDyn)
+export(Lrnr_ts_weights)
 export(Lrnr_xgboost)
 export(Pipeline)
 export(Shared_Data)
@@ -80,6 +85,8 @@ export(delayed_learner_train)
 export(delayed_make_learner)
 export(dt_expand_factors)
 export(factor_to_indicators)
+export(importance)
+export(importance_plot)
 export(inverse_sample)
 export(learner_fit_chain)
 export(learner_fit_predict)
@@ -115,7 +122,6 @@ export(undebug_learner)
 export(unpack_predictions)
 export(validation_task)
 export(variable_type)
-export(varimp)
 export(write_learner_template)
 import(R6)
 import(data.table)
@@ -126,14 +132,24 @@ importFrom(R6,R6Class)
 importFrom(assertthat,assert_that)
 importFrom(assertthat,is.count)
 importFrom(assertthat,is.flag)
+importFrom(caret,findLinearCombos)
+importFrom(data.table,":=")
+importFrom(data.table,data.table)
 importFrom(data.table,set)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setnames)
+importFrom(data.table,setorderv)
 importFrom(digest,digest)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,group_by)
+importFrom(dplyr,select)
+importFrom(dplyr,summarise_all)
 importFrom(ggplot2,cut_interval)
 importFrom(ggplot2,cut_number)
+importFrom(graphics,dotchart)
 importFrom(imputeMissings,impute)
 importFrom(methods,is)
+importFrom(origami,combiner_c)
 importFrom(origami,cross_validate)
 importFrom(origami,fold_index)
 importFrom(origami,id_folds_to_folds)
@@ -153,8 +169,7 @@ importFrom(stats,qlogis)
 importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(stats,weighted.mean)
-importFrom(stringr,str_extract)
-importFrom(stringr,str_remove)
 importFrom(utils,apropos)
+importFrom(utils,getS3method)
 importFrom(utils,packageVersion)
 importFrom(uuid,UUIDgenerate)
diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,55 @@
-# sl3 1.3.1
+# sl3 1.4.2
+* Updates to variable importance functionality, including calculation of risk
+  ratio and risk differences under covariate deletion or permutation.
+* Addition of a `importance_plot` to summarize variable importance findings.
+* Additions of new methods `reparameterize` and `retrain` to `Lrnr_base`, which
+  allows modification of the covariate set while training on a conserved task
+  and prediction on a new task using previously trained learners, respectively.
+
+# sl3 1.4.1
+* [TODO]
+
+# sl3 1.4.0
+* [TODO]
+
+# sl3 1.3.9
+* [TODO]
+
+# sl3 1.3.8
+* Updates to variable importance functionality, including use of risk ratios.
+* Change `Lrnr_hal9001` and `Lrnr_glmnet` to respect observation-level IDs.
+* Removal of `Remotes` and deprecation of `Lrnr_rfcde` and `Lrnr_condensier`:
+  * Both of these learner classes provided support for conditional density
+      estimation (CDE) and were useful when support for CDE was more limited.
+      Unfortunately, both packages are un-maintained or updated only very
+      sporadically, resulting in both frequent bugs and presenting an obstacle
+      for an eventual CRAN release (both packages are GitHub-only).
+  * `Lrnr_rfcde` wrapped https://github.com/tpospisi/RFCDE, a sporadically
+      maintained tool for conditional density estimation (CDE). Support for
+      this has been removed in favor of built-in CDE tools, including, among
+      others, `Lrnr_density_semiparametric`.
+  * `Lrnr_condensier` wrapped https://github.com/osofr/condensier, which
+      provided a pooled hazards approach to CDE. This package contained an
+      implementation error (https://github.com/osofr/condensier/issues/15) and
+      was removed from CRAN. Support for this has been removed in favor of
+      `Lrnr_density_semiparametric` and `Lrnr_haldensify`, both of which more
+      reliably provide CDE support.
+
+# sl3 1.3.7
+* Sampling methods for Monte Carlo integration and related procedures.
+* A metalearner for the cross-validation selector (discrete super learner).
+* A learner for bounding, including support for bounded losses.
+* Resolution of a number of older issues (see #264).
+* Relaxation of checks inside `Stack` objects for time series learners.
+* Addition of a learner property table to `README.Rmd`.
+* Maintenance and documentation updates.
+
+# sl3 1.3.5
 * Overhaul of data preprocessing.
 * New screening methods and convex combination in `Lrnr_nnls`.
-* Bug fixes including covariate subsetting and better handling of NAs.
-* Package and documentation cleanup.
+* Bug fixes, including covariate subsetting and better handling of `NA`s.
+* Package and documentation cleanup; continuous integration and testing fixes.
+* Reproducibility updates (including new versioning and DOI minting).
 
 # sl3 1.3.0
 * Fixes incorrect handling of missingness in the automatic imputation procedure.

diff --git a/R/CV_Lrnr_sl.R b/R/CV_Lrnr_sl.R
@@ -7,12 +7,7 @@ CV_lrnr_sl <- function(lrnr_sl, task, loss_fun) {
   if (!inherits(lrnr_sl, "Lrnr_sl")) {
     stop("lrnr_sl must be a Lrnr_sl object")
   }
-
-
-
   cv_sl <- make_learner(Lrnr_cv, lrnr_sl, full_fit = TRUE)
-
-
   cv_sl_fit <- cv_sl$train(task)
   #
   # # to avoid refitting the stack to the full data,
@@ -29,7 +24,6 @@ CV_lrnr_sl <- function(lrnr_sl, task, loss_fun) {
   # cv_combined_fit <- copy(cv_sl)
   # cv_combined_fit$set_train(combined_fit_object, task)
   #
-
   full_fit <- cv_sl_fit$fit_object$full_fit
 
   # TODO: extract loss function from lrnr_sl where possible
@@ -40,6 +34,5 @@ CV_lrnr_sl <- function(lrnr_sl, task, loss_fun) {
   stack_risks <- full_risk[full_risk$learner != "SuperLearner"]
   set(sl_risk, , "learner", "SuperLearner")
   risks <- rbind(stack_risks, sl_risk)
-
   return(risks)
 }