Skip to content

Commit

Permalink
[python] handle params aliases centralized (#2489)
Browse files Browse the repository at this point in the history
* handle aliases centralized

* convert aliases dict to class
  • Loading branch information
StrikerRUS committed Oct 22, 2019
1 parent fbf9568 commit 5dcd4be
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 29 deletions.
74 changes: 63 additions & 11 deletions python-package/lightgbm/basic.py
Expand Up @@ -170,6 +170,57 @@ class LightGBMError(Exception):
pass


class _ConfigAliases(object):
aliases = {"boosting": {"boosting",
"boosting_type",
"boost"},
"categorical_feature": {"categorical_feature",
"cat_feature",
"categorical_column",
"cat_column"},
"early_stopping_round": {"early_stopping_round",
"early_stopping_rounds",
"early_stopping",
"n_iter_no_change"},
"eval_at": {"eval_at",
"ndcg_eval_at",
"ndcg_at",
"map_eval_at",
"map_at"},
"header": {"header",
"has_header"},
"machines": {"machines",
"workers",
"nodes"},
"metric": {"metric",
"metrics",
"metric_types"},
"num_class": {"num_class",
"num_classes"},
"num_iterations": {"num_iterations",
"num_iteration",
"n_iter",
"num_tree",
"num_trees",
"num_round",
"num_rounds",
"num_boost_round",
"n_estimators"},
"objective": {"objective",
"objective_type",
"app",
"application"},
"verbosity": {"verbosity",
"verbose"}}

@classmethod
def get(cls, *args):
ret = set()
for i in args:
ret |= cls.aliases.get(i, set())
return ret


MAX_INT32 = (1 << 31) - 1

"""Macro definition of data type in C API of LightGBM"""
Expand Down Expand Up @@ -741,8 +792,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None):
data_has_header = False
if isinstance(data, string_type):
# check data has header or not
if self.params.get("has_header", False) or self.params.get("header", False):
data_has_header = True
data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header"))
init_score = predictor.predict(data,
raw_score=True,
data_has_header=data_has_header,
Expand Down Expand Up @@ -793,7 +843,7 @@ def _lazy_init(self, data, label=None, reference=None,
'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key))
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1
# get categorical features
if categorical_feature is not None:
Expand All @@ -810,10 +860,10 @@ def _lazy_init(self, data, label=None, reference=None,
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
.format(type(name).__name__, name))
if categorical_indices:
if "categorical_feature" in params or "categorical_column" in params:
warnings.warn('categorical_feature in param dict is overridden.')
params.pop("categorical_feature", None)
params.pop("categorical_column", None)
for cat_alias in _ConfigAliases.get("categorical_feature"):
if cat_alias in params:
warnings.warn('{} in param dict is overridden.'.format(cat_alias))
params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices)

params_str = param_dict_to_str(params)
Expand Down Expand Up @@ -1259,7 +1309,9 @@ def _set_predictor(self, predictor):
"""
if predictor is self._predictor:
return self
if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None):
if self.data is not None or (self.used_indices is not None
and self.reference is not None
and self.reference.data is not None):
self._predictor = predictor
return self._free_handle()
else:
Expand Down Expand Up @@ -1634,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
self.best_score = {}
params = {} if params is None else copy.deepcopy(params)
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1
if train_set is not None:
# Training task
Expand All @@ -1643,7 +1695,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
.format(type(train_set).__name__))
params_str = param_dict_to_str(params)
# set network if necessary
for alias in ["machines", "workers", "nodes"]:
for alias in _ConfigAliases.get("machines"):
if alias in params:
machines = params[alias]
if isinstance(machines, string_type):
Expand Down Expand Up @@ -1863,7 +1915,7 @@ def reset_parameter(self, params):
self : Booster
Booster with new parameters.
"""
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
if any(metric_alias in params for metric_alias in _ConfigAliases.get("metric")):
self.__need_reload_eval_info = True
params_str = param_dict_to_str(params)
if params_str:
Expand Down
11 changes: 4 additions & 7 deletions python-package/lightgbm/callback.py
Expand Up @@ -7,6 +7,7 @@
import warnings
from operator import gt, lt

from .basic import _ConfigAliases
from .compat import range_


Expand Down Expand Up @@ -130,9 +131,7 @@ def reset_parameter(**kwargs):
def _callback(env):
new_parameters = {}
for key, value in kwargs.items():
if key in ['num_class', 'num_classes',
'boosting', 'boost', 'boosting_type',
'metric', 'metrics', 'metric_types']:
if key in _ConfigAliases.get("num_class", "boosting", "metric"):
raise RuntimeError("Cannot reset {} during training".format(repr(key)))
if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration:
Expand Down Expand Up @@ -184,10 +183,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
first_metric = ['']

def _init(env):
enabled[0] = not any((boost_alias in env.params
and env.params[boost_alias] == 'dart') for boost_alias in ('boosting',
'boosting_type',
'boost'))
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
in _ConfigAliases.get("boosting"))
if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode')
return
Expand Down
20 changes: 12 additions & 8 deletions python-package/lightgbm/engine.py
Expand Up @@ -11,7 +11,7 @@
import numpy as np

from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold,
string_type, integer_types, range_, zip_)

Expand Down Expand Up @@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100,
# create predictor first
params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
Expand Down Expand Up @@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
else:
if 'objective' in params and params['objective'] == 'lambdarank':
if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in _ConfigAliases.get("objective")):
if not SKLEARN_INSTALLED:
raise LightGBMError('Scikit-learn is required for lambdarank cv.')
# lambdarank task, split according to groups
Expand Down Expand Up @@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100,

params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias)
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias)
Expand All @@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100,
.set_categorical_feature(categorical_feature)

if metrics is not None:
for metric_alias in _ConfigAliases.get("metric"):
params.pop(metric_alias, None)
params['metric'] = metrics

results = collections.defaultdict(list)
Expand Down
12 changes: 9 additions & 3 deletions python-package/lightgbm/sklearn.py
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from .basic import Dataset, LightGBMError
from .basic import Dataset, LightGBMError, _ConfigAliases
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
Expand Down Expand Up @@ -489,15 +489,21 @@ def fit(self, X, y,
evals_result = {}
params = self.get_params()
# user can set verbose with kwargs, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and self.silent:
params['verbose'] = -1
params.pop('silent', None)
params.pop('importance_type', None)
params.pop('n_estimators', None)
params.pop('class_weight', None)
for alias in _ConfigAliases.get('objective'):
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2:
for alias in _ConfigAliases.get('num_class'):
params.pop(alias, None)
params['num_class'] = self._n_classes
if hasattr(self, '_eval_at'):
for alias in _ConfigAliases.get('eval_at'):
params.pop(alias, None)
params['eval_at'] = self._eval_at
params['objective'] = self._objective
if self._fobj:
Expand All @@ -518,7 +524,7 @@ def fit(self, X, y,
elif isinstance(self, LGBMRanker):
original_metric = "ndcg"
# overwrite default metric by explicitly set metric
for metric_alias in ['metric', 'metrics', 'metric_types']:
for metric_alias in _ConfigAliases.get("metric"):
if metric_alias in params:
original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric
Expand Down

0 comments on commit 5dcd4be

Please sign in to comment.