diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 972e4206afc..973fc5e3954 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -140,7 +140,7 @@ lgb.cv <- function(params = list(), begin_iteration <- predictor$current_iter() + 1 } # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one - n_trees <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds") + n_trees <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators") if (any(names(params) %in% n_trees)) { end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1 } else { diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 25cd31ab191..ec1074ba360 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -117,7 +117,7 @@ lgb.train <- function(params = list(), begin_iteration <- predictor$current_iter() + 1 } # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one - n_rounds <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds") + n_rounds <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators") if (any(names(params) %in% n_rounds)) { end_iteration <- begin_iteration + params[[which(names(params) %in% n_rounds)[1]]] - 1 } else { diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 18b3b645e31..c2703aa6770 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -73,7 +73,7 @@ Core Parameters - ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed `__ - - ``binary``, binary `log loss `__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1] + - ``binary``, binary `log loss `__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1] - multi-class classification application @@ -109,13 +109,13 @@ Core Parameters - ``goss``, Gradient-based One-Side Sampling -- ``data`` :raw-html:`🔗︎`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``data_filename`` +- ``data`` :raw-html:`🔗︎`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename`` - path of training data, LightGBM will train from this data - **Note**: can be used only in CLI version -- ``valid`` :raw-html:`🔗︎`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``valid_filenames`` +- ``valid`` :raw-html:`🔗︎`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``test_data_file``, ``valid_filenames`` - path(s) of validation/test data, LightGBM will output metrics for these data @@ -123,7 +123,7 @@ Core Parameters - **Note**: can be used only in CLI version -- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0`` +- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0`` - number of boosting iterations @@ -131,17 +131,17 @@ Core Parameters - **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems -- ``learning_rate`` :raw-html:`🔗︎`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, constraints: ``learning_rate > 0.0`` +- ``learning_rate`` :raw-html:`🔗︎`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, ``eta``, constraints: ``learning_rate > 0.0`` - shrinkage rate - in ``dart``, it also affects on normalization weights of dropped trees -- ``num_leaves`` :raw-html:`🔗︎`, default = ``31``, type = int, aliases: ``num_leaf``, constraints: ``num_leaves > 1`` +- ``num_leaves`` :raw-html:`🔗︎`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, constraints: ``num_leaves > 1`` - max number of leaves in one tree -- ``tree_learner`` :raw-html:`🔗︎`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_learner_type`` +- ``tree_learner`` :raw-html:`🔗︎`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_type``, ``tree_learner_type`` - ``serial``, single machine tree learner @@ -153,7 +153,7 @@ Core Parameters - refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details -- ``num_threads`` :raw-html:`🔗︎`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads`` +- ``num_threads`` :raw-html:`🔗︎`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads``, ``n_jobs`` - number of threads for LightGBM @@ -177,7 +177,7 @@ Core Parameters - **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support -- ``seed`` :raw-html:`🔗︎`, default = ``0``, type = int, aliases: ``random_seed`` +- ``seed`` :raw-html:`🔗︎`, default = ``0``, type = int, aliases: ``random_seed``, ``random_state`` - this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed`` @@ -252,7 +252,7 @@ Learning Control Parameters - L1 regularization -- ``lambda_l2`` :raw-html:`🔗︎`, default = ``0.0``, type = double, aliases: ``reg_lambda``, constraints: ``lambda_l2 >= 0.0`` +- ``lambda_l2`` :raw-html:`🔗︎`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, constraints: ``lambda_l2 >= 0.0`` - L2 regularization @@ -260,17 +260,17 @@ Learning Control Parameters - the minimal gain to perform split -- ``drop_rate`` :raw-html:`🔗︎`, default = ``0.1``, type = double, constraints: ``0.0 <= drop_rate <= 1.0`` +- ``drop_rate`` :raw-html:`🔗︎`, default = ``0.1``, type = double, aliases: ``rate_drop``, constraints: ``0.0 <= drop_rate <= 1.0`` - used only in ``dart`` - - dropout rate + - dropout rate: a fraction of previous trees to drop during the dropout - ``max_drop`` :raw-html:`🔗︎`, default = ``50``, type = int - used only in ``dart`` - - max number of dropped trees on one iteration + - max number of dropped trees during one boosting iteration - ``<=0`` means no limit @@ -278,7 +278,7 @@ Learning Control Parameters - used only in ``dart`` - - probability of skipping drop + - probability of skipping the dropout procedure during a boosting iteration - ``xgboost_dart_mode`` :raw-html:`🔗︎`, default = ``false``, type = bool @@ -350,7 +350,7 @@ Learning Control Parameters - you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature -- ``feature_contri`` :raw-html:`🔗︎`, default = ``None``, type = multi-double, aliases: ``fc``, ``fp``, ``feature_penalty`` +- ``feature_contri`` :raw-html:`🔗︎`, default = ``None``, type = multi-double, aliases: ``feature_contrib``, ``fc``, ``fp``, ``feature_penalty`` - used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature @@ -397,13 +397,13 @@ IO Parameters - set this to larger value if data is very sparse -- ``histogram_pool_size`` :raw-html:`🔗︎`, default = ``-1.0``, type = double +- ``histogram_pool_size`` :raw-html:`🔗︎`, default = ``-1.0``, type = double, aliases: ``hist_pool_size`` - max cache size in MB for historical histogram - ``< 0`` means no limit -- ``data_random_seed`` :raw-html:`🔗︎`, default = ``1``, type = int +- ``data_random_seed`` :raw-html:`🔗︎`, default = ``1``, type = int, aliases: ``data_seed`` - random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode) @@ -413,7 +413,7 @@ IO Parameters - **Note**: can be used only in CLI version -- ``snapshot_freq`` :raw-html:`🔗︎`, default = ``-1``, type = int +- ``snapshot_freq`` :raw-html:`🔗︎`, default = ``-1``, type = int, aliases: ``save_period`` - frequency of saving model file snapshot @@ -431,7 +431,7 @@ IO Parameters - **Note**: can be used only in CLI version -- ``output_result`` :raw-html:`🔗︎`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result`` +- ``output_result`` :raw-html:`🔗︎`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result``, ``predict_name``, ``prediction_name``, ``pred_name``, ``name_pred`` - filename of prediction result in ``prediction`` task @@ -644,7 +644,7 @@ Objective Parameters - used only in ``multi-class`` classification application -- ``is_unbalance`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``unbalanced_sets`` +- ``is_unbalance`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``unbalance``, ``unbalanced_sets`` - used only in ``binary`` application @@ -729,7 +729,7 @@ Metric Parameters - metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments - - ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) + - ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) - ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na`` diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 5c49d2063a7..20e48218a37 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -110,7 +110,7 @@ struct Config { // descl2 = ``mape``, `MAPE loss `__, aliases: ``mean_absolute_percentage_error`` // descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed `__ // descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed `__ - // desc = ``binary``, binary `log loss `__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1] + // desc = ``binary``, binary `log loss `__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1] // desc = multi-class classification application // descl2 = ``multiclass``, `softmax `__ objective function, aliases: ``softmax`` // descl2 = ``multiclassova``, `One-vs-All `__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr`` @@ -135,33 +135,33 @@ struct Config { // desc = ``goss``, Gradient-based One-Side Sampling std::string boosting = "gbdt"; - // alias = train, train_data, data_filename + // alias = train, train_data, train_data_file, data_filename // desc = path of training data, LightGBM will train from this data // desc = **Note**: can be used only in CLI version std::string data = ""; - // alias = test, valid_data, valid_data_file, test_data, valid_filenames + // alias = test, valid_data, valid_data_file, test_data, test_data_file, valid_filenames // default = "" // desc = path(s) of validation/test data, LightGBM will output metrics for these data // desc = support multiple validation data, separated by ``,`` // desc = **Note**: can be used only in CLI version std::vector valid; - // alias = num_iteration, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators + // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators // check = >=0 // desc = number of boosting iterations // desc = **Note**: for Python/R-package, **this parameter is ignored**, use ``num_boost_round`` (Python) or ``nrounds`` (R) input arguments of ``train`` and ``cv`` methods instead // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems int num_iterations = 100; - // alias = shrinkage_rate + // alias = shrinkage_rate, eta // check = >0.0 // desc = shrinkage rate // desc = in ``dart``, it also affects on normalization weights of dropped trees double learning_rate = 0.1; // default = 31 - // alias = num_leaf + // alias = num_leaf, max_leaves, max_leaf // check = >1 // desc = max number of leaves in one tree int num_leaves = kDefaultNumLeaves; @@ -169,7 +169,7 @@ struct Config { // [doc-only] // type = enum // options = serial, feature, data, voting - // alias = tree, tree_learner_type + // alias = tree, tree_type, tree_learner_type // desc = ``serial``, single machine tree learner // desc = ``feature``, feature parallel tree learner, aliases: ``feature_parallel`` // desc = ``data``, data parallel tree learner, aliases: ``data_parallel`` @@ -177,7 +177,7 @@ struct Config { // desc = refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details std::string tree_learner = "serial"; - // alias = num_thread, nthread, nthreads + // alias = num_thread, nthread, nthreads, n_jobs // desc = number of threads for LightGBM // desc = ``0`` means default number of threads in OpenMP // desc = for the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPUs use `hyper-threading `__ to generate 2 threads per CPU core) @@ -197,7 +197,7 @@ struct Config { std::string device_type = "cpu"; // [doc-only] - // alias = random_seed + // alias = random_seed, random_state // desc = this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed`` // desc = will be overridden, if you set other seeds int seed = 0; @@ -266,7 +266,7 @@ struct Config { // desc = L1 regularization double lambda_l1 = 0.0; - // alias = reg_lambda + // alias = reg_lambda, lambda // check = >=0.0 // desc = L2 regularization double lambda_l2 = 0.0; @@ -276,21 +276,22 @@ struct Config { // desc = the minimal gain to perform split double min_gain_to_split = 0.0; + // alias = rate_drop // check = >=0.0 // check = <=1.0 // desc = used only in ``dart`` - // desc = dropout rate + // desc = dropout rate: a fraction of previous trees to drop during the dropout double drop_rate = 0.1; // desc = used only in ``dart`` - // desc = max number of dropped trees on one iteration + // desc = max number of dropped trees during one boosting iteration // desc = ``<=0`` means no limit int max_drop = 50; // check = >=0.0 // check = <=1.0 // desc = used only in ``dart`` - // desc = probability of skipping drop + // desc = probability of skipping the dropout procedure during a boosting iteration double skip_drop = 0.5; // desc = used only in ``dart`` @@ -355,7 +356,7 @@ struct Config { std::vector monotone_constraints; // type = multi-double - // alias = fc, fp, feature_penalty + // alias = feature_contrib, fc, fp, feature_penalty // default = None // desc = used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature // desc = you need to specify all features in order @@ -395,10 +396,12 @@ struct Config { // desc = set this to larger value if data is very sparse int bin_construct_sample_cnt = 200000; + // alias = hist_pool_size // desc = max cache size in MB for historical histogram // desc = ``< 0`` means no limit double histogram_pool_size = -1.0; + // alias = data_seed // desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode) int data_random_seed = 1; @@ -407,6 +410,7 @@ struct Config { // desc = **Note**: can be used only in CLI version std::string output_model = "LightGBM_model.txt"; + // alias = save_period // desc = frequency of saving model file snapshot // desc = set this to positive value to enable this function. For example, the model file will be snapshotted at each iteration if ``snapshot_freq=1`` // desc = **Note**: can be used only in CLI version @@ -419,7 +423,7 @@ struct Config { // desc = **Note**: can be used only in CLI version std::string input_model = ""; - // alias = predict_result, prediction_result + // alias = predict_result, prediction_result, predict_name, prediction_name, pred_name, name_pred // desc = filename of prediction result in ``prediction`` task // desc = **Note**: can be used only in CLI version std::string output_result = "LightGBM_predict_result.txt"; @@ -588,7 +592,7 @@ struct Config { // desc = used only in ``multi-class`` classification application int num_class = 1; - // alias = unbalanced_sets + // alias = unbalance, unbalanced_sets // desc = used only in ``binary`` application // desc = set this to ``true`` if training data are unbalance // desc = **Note**: this parameter cannot be used at the same time with ``scale_pos_weight``, choose only **one** of them @@ -658,7 +662,7 @@ struct Config { // default = "" // type = multi-enum // desc = metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments - // descl2 = ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) + // descl2 = ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) // descl2 = ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na`` // descl2 = ``l1``, absolute loss, aliases: ``mean_absolute_error``, ``mae``, ``regression_l1`` // descl2 = ``l2``, square loss, aliases: ``mean_squared_error``, ``mse``, ``regression_l2``, ``regression`` diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 4da4b7c4a05..33dda5ed5e6 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -660,12 +660,8 @@ def _lazy_init(self, data, label=None, reference=None, warnings.warn('{0} keyword has been found in `params` and will be ignored. ' 'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key)) self.predictor = predictor - if "verbosity" in params: - params.setdefault("verbose", params.pop("verbosity")) if silent: params["verbose"] = 0 - elif "verbose" not in params: - params["verbose"] = 1 # get categorical features if categorical_feature is not None: categorical_indices = set() @@ -1340,12 +1336,8 @@ def __init__(self, params=None, train_set=None, model_file=None, silent=False): self.best_iteration = -1 self.best_score = {} params = {} if params is None else params - if "verbosity" in params: - params.setdefault("verbose", params.pop("verbosity")) if silent: params["verbose"] = 0 - elif "verbose" not in params: - params["verbose"] = 1 if train_set is not None: # Training task if not isinstance(train_set, Dataset): diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 0c3131ad90a..21d2678c799 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -98,7 +98,8 @@ def train(params, train_set, num_boost_round=100, The trained Booster model. """ # create predictor first - for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]: + for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", + "num_round", "num_rounds", "num_boost_round", "n_estimators"]: if alias in params: num_boost_round = int(params.pop(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) @@ -396,7 +397,8 @@ def cv(params, train_set, num_boost_round=100, if not isinstance(train_set, Dataset): raise TypeError("Traninig only accepts Dataset object") - for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]: + for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", + "num_round", "num_rounds", "num_boost_round", "n_estimators"]: if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) num_boost_round = params.pop(alias) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 6f60c2d3628..ecdf25e7ab3 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -397,9 +397,6 @@ def fit(self, X, y, self._fobj = None evals_result = {} params = self.get_params() - # sklearn interface has another naming convention - params.setdefault('seed', params.pop('random_state')) - params.setdefault('nthread', params.pop('n_jobs')) # user can set verbose with kwargs, it has higher priority if 'verbose' not in params and self.silent: params['verbose'] = 0 diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 788ff6566d7..3a6e872b7e3 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -11,13 +11,16 @@ std::unordered_map Config::alias_table({ {"boost", "boosting"}, {"train", "data"}, {"train_data", "data"}, + {"train_data_file", "data"}, {"data_filename", "data"}, {"test", "valid"}, {"valid_data", "valid"}, {"valid_data_file", "valid"}, {"test_data", "valid"}, + {"test_data_file", "valid"}, {"valid_filenames", "valid"}, {"num_iteration", "num_iterations"}, + {"n_iter", "num_iterations"}, {"num_tree", "num_iterations"}, {"num_trees", "num_iterations"}, {"num_round", "num_iterations"}, @@ -25,14 +28,20 @@ std::unordered_map Config::alias_table({ {"num_boost_round", "num_iterations"}, {"n_estimators", "num_iterations"}, {"shrinkage_rate", "learning_rate"}, + {"eta", "learning_rate"}, {"num_leaf", "num_leaves"}, + {"max_leaves", "num_leaves"}, + {"max_leaf", "num_leaves"}, {"tree", "tree_learner"}, + {"tree_type", "tree_learner"}, {"tree_learner_type", "tree_learner"}, {"num_thread", "num_threads"}, {"nthread", "num_threads"}, {"nthreads", "num_threads"}, + {"n_jobs", "num_threads"}, {"device", "device_type"}, {"random_seed", "seed"}, + {"random_state", "seed"}, {"min_data_per_leaf", "min_data_in_leaf"}, {"min_data", "min_data_in_leaf"}, {"min_child_samples", "min_data_in_leaf"}, @@ -53,10 +62,13 @@ std::unordered_map Config::alias_table({ {"max_leaf_output", "max_delta_step"}, {"reg_alpha", "lambda_l1"}, {"reg_lambda", "lambda_l2"}, + {"lambda", "lambda_l2"}, {"min_split_gain", "min_gain_to_split"}, + {"rate_drop", "drop_rate"}, {"topk", "top_k"}, {"mc", "monotone_constraints"}, {"monotone_constraint", "monotone_constraints"}, + {"feature_contrib", "feature_contri"}, {"fc", "feature_contri"}, {"fp", "feature_contri"}, {"feature_penalty", "feature_contri"}, @@ -66,12 +78,19 @@ std::unordered_map Config::alias_table({ {"forced_splits", "forcedsplits_filename"}, {"verbose", "verbosity"}, {"subsample_for_bin", "bin_construct_sample_cnt"}, + {"hist_pool_size", "histogram_pool_size"}, + {"data_seed", "data_random_seed"}, {"model_output", "output_model"}, {"model_out", "output_model"}, + {"save_period", "snapshot_freq"}, {"model_input", "input_model"}, {"model_in", "input_model"}, {"predict_result", "output_result"}, {"prediction_result", "output_result"}, + {"predict_name", "output_result"}, + {"prediction_name", "output_result"}, + {"pred_name", "output_result"}, + {"name_pred", "output_result"}, {"init_score_filename", "initscore_filename"}, {"init_score_file", "initscore_filename"}, {"init_score", "initscore_filename"}, @@ -114,6 +133,7 @@ std::unordered_map Config::alias_table({ {"contrib", "predict_contrib"}, {"convert_model_file", "convert_model"}, {"num_classes", "num_class"}, + {"unbalance", "is_unbalance"}, {"unbalanced_sets", "is_unbalance"}, {"metrics", "metric"}, {"metric_types", "metric"}, diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index b2334291d12..c3640d2306f 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -179,17 +179,6 @@ def test_feature_importances_type(self): importance_gain_top1 = sorted(importances_gain, reverse=True)[0] self.assertNotEqual(importance_split_top1, importance_gain_top1) - def test_sklearn_backward_compatibility(self): - iris = load_iris() - X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) - - # Tests that `seed` is the same as `random_state` - clf_1 = lgb.sklearn.LGBMClassifier(seed=42, subsample=0.6, colsample_bytree=0.8) - clf_2 = lgb.sklearn.LGBMClassifier(random_state=42, subsample=0.6, colsample_bytree=0.8) - y_pred_1 = clf_1.fit(X_train, y_train).predict_proba(X_test) - y_pred_2 = clf_2.fit(X_train, y_train).predict_proba(X_test) - np.testing.assert_allclose(y_pred_1, y_pred_2) - # sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1 @unittest.skipIf(not sklearn_at_least_019, 'scikit-learn version is less than 0.19') def test_sklearn_integration(self):