[CLI][python-package][docs] Add n_estimators as num_iteration alias (#…

…1079) * Add n_estimators as num_iteration alias Scikit-Learn's ensemble methods use the term `n_estimators` for the number of iterations of training models. To make it more accessible for newcomers who are familiar with Scikit-Learn, it would help if the Parameters page mentioned `n_estimators` and what parameter that maps to within LightGBM's API. Addresses discussion brought up in #954 * Add n_estimators as num_iterations alias Adds n_estimators as an alias for num_iterations in the CLI as well as Python libs. Additionally bumps the default for n_estimators in the Sklearn API to 100 estimators.
microsoft · Dec 6, 2017 · 8fd71c0 · 8fd71c0
1 parent 1572267
commit 8fd71c0
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 7 deletions.
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
@@ -115,7 +115,7 @@ Core Parameters
    -  support multi validation data, separate by ``,``
 
 -  ``num_iterations``, default=\ ``100``, type=int,
-   alias=\ ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``
+   alias=\ ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``
 
    -  number of boosting iterations
 

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
@@ -402,6 +402,7 @@ struct ParameterAlias {
       { "num_trees", "num_iterations" },
       { "num_rounds", "num_iterations" },
       { "num_boost_round", "num_iterations" },
+      { "n_estimators", "num_iterations"},
       { "sub_row", "bagging_fraction" },
       { "subsample", "bagging_fraction" },
       { "subsample_freq", "bagging_freq" },

diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
@@ -93,7 +93,7 @@ def train(params, train_set, num_boost_round=100,
         The trained Booster model.
     """
     # create predictor first
-    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]:
+    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
         if alias in params:
             num_boost_round = int(params.pop(alias))
             warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
@@ -307,7 +307,7 @@ def _agg_cv_result(raw_results):
     return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
 
 
-def cv(params, train_set, num_boost_round=10,
+def cv(params, train_set, num_boost_round=100,
        folds=None, nfold=5, stratified=True, shuffle=True,
        metrics=None, fobj=None, feval=None, init_model=None,
        feature_name='auto', categorical_feature='auto',
@@ -322,7 +322,7 @@ def cv(params, train_set, num_boost_round=10,
         Parameters for Booster.
     train_set : Dataset
         Data to be trained on.
-    num_boost_round : int, optional (default=10)
+    num_boost_round : int, optional (default=100)
         Number of boosting iterations.
     folds : a generator or iterator of (train_idx, test_idx) tuples or None, optional (default=None)
         The train and test indices for the each fold.
@@ -383,7 +383,7 @@ def cv(params, train_set, num_boost_round=10,
     if not isinstance(train_set, Dataset):
         raise TypeError("Traninig only accepts Dataset object")
 
-    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]:
+    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
         if alias in params:
             warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
             num_boost_round = params.pop(alias)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
@@ -133,7 +133,7 @@ class LGBMModel(_LGBMModelBase):
     """Implementation of the scikit-learn API for LightGBM."""
 
     def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
-                 learning_rate=0.1, n_estimators=10,
+                 learning_rate=0.1, n_estimators=100,
                  subsample_for_bin=200000, objective=None,
                  min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
                  subsample=1., subsample_freq=1, colsample_bytree=1.,
@@ -154,7 +154,7 @@ def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
             Maximum tree depth for base learners, -1 means no limit.
         learning_rate : float, optional (default=0.1)
             Boosting learning rate.
-        n_estimators : int, optional (default=10)
+        n_estimators : int, optional (default=100)
             Number of boosted trees to fit.
         subsample_for_bin : int, optional (default=50000)
             Number of samples for constructing bins.