[R-package] CRAN fixes (#1499)

* Fixed typos in docs * Fixed inconsistencies in documentation * Updated strategy for registering routines * Fixed issues caused by smashing multiple functions into one Rd * Fixed issues with documentation * Removed VignetteBuilder and updated Rbuildignore * Added R build artefacts to gitignore * Added namespacing on data.table set function. Updated handling of CMakeLists file to get around CRAN check. * Updated build instructions * Added R build script * Removed build_r.sh script and updated R-package install instructions
microsoft · Aug 29, 2018 · eded794 · eded794
1 parent 80a9a94
commit eded794
Show file tree

Hide file tree

Showing 35 changed files with 870 additions and 662 deletions.
diff --git a/.gitignore b/.gitignore
@@ -382,3 +382,11 @@ lightgbm.model
 # duplicate version file
 python-package/lightgbm/VERSION.txt
 .Rproj.user
+
+# R build artefacts
+R-package/src/CMakeLists.txt
+R-package/src/lib_lightgbm.so.dSYM/
+R-package/src/src/
+lightgbm_r/*
+lightgbm*.tar.gz
+lightgbm.Rcheck/
diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore
@@ -1 +1,12 @@
 ^build_package.R$
+\.gitkeep$
+
+# Objects created by compilation
+\.o$
+\.so$
+\.dll$
+\.out$
+\.bin$
+
+# Code copied in at build time
+^src/CMakeLists.txt$
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
@@ -7,7 +7,7 @@ Authors@R: c(
 	person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
 	person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("ctb")),
 	person("Yachen", "Yan", role = c("ctb")),
-	person("James", "Lamb", role = c("ctb"))
+	person("James", "Lamb", email="james.lamb@uptake.com", role = c("ctb"))
 	)
 Description: Tree based algorithms can be improved by introducing boosting frameworks. LightGBM is one such framework, and this package offers an R interface to work with it.
     It is designed to be distributed and efficient with the following advantages:
@@ -21,7 +21,6 @@ Description: Tree based algorithms can be improved by introducing boosting frame
 License: MIT + file LICENSE
 URL: https://github.com/Microsoft/LightGBM
 BugReports: https://github.com/Microsoft/LightGBM/issues
-VignetteBuilder: knitr
 Suggests:
     Ckmeans.1d.dp (>= 3.3.1),
     DiagrammeR (>= 0.8.1),
@@ -33,7 +32,7 @@ Suggests:
     testthat,
     vcd (>= 1.3)
 Depends:
-    R (>= 3.0),
+    R (>= 3.4),
     R6 (>= 2.0)
 Imports:
     data.table (>= 1.9.6),

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
@@ -49,4 +49,4 @@ importFrom(magrittr,"%T>%")
 importFrom(magrittr,extract)
 importFrom(magrittr,inset)
 importFrom(methods,is)
-useDynLib(lib_lightgbm)
+useDynLib(lib_lightgbm , .registration = TRUE)
diff --git a/R-package/R/callback.R b/R-package/R/callback.R
@@ -1,4 +1,5 @@
-CB_ENV <- R6Class(
+#' @importFrom R6 R6Class
+CB_ENV <- R6::R6Class(
   "lgb.cb_env",
   cloneable = FALSE,
   public = list(

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
@@ -1,4 +1,5 @@
-Booster <- R6Class(
+#' @importFrom R6 R6Class
+Booster <- R6::R6Class(
   classname = "lgb.Booster",
   cloneable = FALSE,
   public = list(
@@ -654,13 +655,15 @@ Booster <- R6Class(
 #' 
 #' @rdname predict.lgb.Booster
 #' @export
-predict.lgb.Booster <- function(object, data,
-                        num_iteration = NULL,
-                        rawscore = FALSE,
-                        predleaf = FALSE,
-                        predcontrib = FALSE,
-                        header = FALSE,
-                        reshape = FALSE, ...) {
+predict.lgb.Booster <- function(object,
+                                data,
+                                num_iteration = NULL,
+                                rawscore = FALSE,
+                                predleaf = FALSE,
+                                predcontrib = FALSE,
+                                header = FALSE,
+                                reshape = FALSE, 
+                                ...) {
 
   # Check booster existence
   if (!lgb.is.Booster(object)) {

diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
@@ -1,6 +1,8 @@
 
 #' @importFrom methods is
-Dataset <- R6Class(
+#' @importFrom R6 R6Class
+Dataset <- R6::R6Class(
+
   classname = "lgb.Dataset",
   cloneable = FALSE,
   public = list(
@@ -854,8 +856,8 @@ dimnames.lgb.Dataset <- function(x) {
 #' Slice a dataset
 #'
 #' Get a new \code{lgb.Dataset} containing the specified rows of
-#' orginal lgb.Dataset object
-#'
+#' original lgb.Dataset object
+#' 
 #' @param dataset Object of class "lgb.Dataset"
 #' @param idxset a integer vector of indices of rows needed
 #' @param ... other parameters (currently not used)

diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R
@@ -1,6 +1,8 @@
 
 #' @importFrom methods is
-Predictor <- R6Class(
+#' @importFrom R6 R6Class
+Predictor <- R6::R6Class(
+
   classname = "lgb.Predictor",
   cloneable = FALSE,
   public = list(

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
@@ -1,4 +1,5 @@
-CVBooster <- R6Class(
+#' @importFrom R6 R6Class
+CVBooster <- R6::R6Class(
   classname = "lgb.CVBooster",
   cloneable = FALSE,
   public = list(
@@ -17,46 +18,39 @@ CVBooster <- R6Class(
 )
 
 #' @title Main CV logic for LightGBM
+#' @description Cross validation logic used by LightGBM
 #' @name lgb.cv
-#' @param params List of parameters
-#' @param data a \code{lgb.Dataset} object, used for CV
-#' @param nrounds number of CV rounds
+#' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
 #' @param label vector of response values. Should be provided only when data is an R-matrix.
 #' @param weight vector of response values. If not NULL, will set to dataset
 #' @param obj objective function, can be character or custom objective function. Examples include 
 #'        \code{regression}, \code{regression_l1}, \code{huber},
 #'        \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
-#' @param boosting boosting type. \code{gbdt}, \code{dart}
-#' @param num_leaves number of leaves in one tree. defaults to 127
-#' @param max_depth Limit the max depth for tree model. This is used to deal with overfit when #data is small. 
-#'        Tree still grow by leaf-wise.
-#' @param num_threads Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).
 #' @param eval evaluation function, can be (list of) character or custom eval function
-#' @param verbose verbosity for output, if <= 0, also will disable the print of evalutaion during training
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} 
-#' @param eval_freq evalutaion output frequence, only effect when verbose > 0
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'        by the values of outcome labels.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'        (each element must be a vector of test fold's indices). When folds are supplied,
 #'        the \code{nfold} and \code{stratified} parameters are ignored.
-#' @param init_model path of model file of \code{lgb.Booster} object, will continue train from this model
 #' @param colnames feature names, if not null, will use this to overwrite the names in dataset
 #' @param categorical_feature list of str or int
 #'        type int represents index,
 #'        type str represents feature names
-#' @param early_stopping_rounds int
-#'        Activates early stopping.
-#'        CV score needs to improve at least every early_stopping_rounds round(s) to continue.
-#'        Requires at least one metric.
-#'        If there's more than one, will check all of them.
-#'        Returns the model with (best_iter + early_stopping_rounds).
-#'        If early stopping occurs, the model will have 'best_iter' field
 #' @param callbacks list of callback functions
 #'        List of callback functions that are applied at each iteration.
-#' @param ... other parameters, see Parameters.rst for more informations
+#' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
+#'            \itemize{
+#'                \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
+#'                \item{num_leaves}{number of leaves in one tree. defaults to 127}
+#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with 
+#'                                 overfit when #data is small. Tree still grow by leaf-wise.}
+#'                \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
+#'                                   the number of real CPU cores, not the number of threads (most 
+#'                                   CPU using hyper-threading to generate 2 threads per CPU core).}
+#'            }
 #' 
 #' @return a trained model \code{lgb.CVBooster}.
 #' 
@@ -75,7 +69,6 @@ CVBooster <- R6Class(
 #'                 learning_rate = 1,
 #'                 early_stopping_rounds = 10)
 #' }
-#' @rdname lgb.train
 #' @export
 lgb.cv <- function(params = list(),
                    data,

diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
@@ -20,7 +20,7 @@
 #'  \item \code{leaf_index}: ID of a leaf in a tree (integer)
 #'  \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
 #'  \item \code{split_gain}: Split gain of a node
-#'  \item \code{threshold}: Spliting threshold value of a node
+#'  \item \code{threshold}: Splitting threshold value of a node
 #'  \item \code{decision_type}: Decision type of a node
 #'  \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right
 #'  \item \code{internal_value}: Node value
@@ -47,7 +47,7 @@
 #' }
 #'
 #' @importFrom magrittr %>%
-#' @importFrom data.table := data.table
+#' @importFrom data.table := data.table rbindlist
 #' @importFrom jsonlite fromJSON
 #' @export
 lgb.model.dt.tree <- function(model, num_iteration = NULL) {
@@ -78,6 +78,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
 
 }
 
+
 #' @importFrom data.table data.table rbindlist
 single.tree.parse <- function(lgb_tree) {