diff --git a/optuna/integration/mlflow.py b/optuna/integration/mlflow.py index 8ab93a255f..8b9cd685ea 100644 --- a/optuna/integration/mlflow.py +++ b/optuna/integration/mlflow.py @@ -1,9 +1,7 @@ import functools -from itertools import islice from typing import Any from typing import Callable from typing import Dict -from typing import Generator from typing import List from typing import Optional from typing import Sequence @@ -18,8 +16,6 @@ with try_import() as _imports: import mlflow - from mlflow.utils.validation import MAX_METRICS_PER_BATCH - from mlflow.utils.validation import MAX_PARAMS_TAGS_PER_BATCH RUN_ID_ATTRIBUTE_KEY = "mlflow_run_id" @@ -280,10 +276,7 @@ def _set_tags(self, trial: optuna.trial.FrozenTrial, study: optuna.study.Study) if len(value) > max_val_length: tags[key] = "{}...".format(value[: max_val_length - 3]) - # This sets the tags for MLflow. - # MLflow handles up to 100 tags per request. - for tags_chunk in _dict_chunks(tags, MAX_PARAMS_TAGS_PER_BATCH): - mlflow.set_tags(tags_chunk) + mlflow.set_tags(tags) def _log_metrics(self, values: Optional[List[float]]) -> None: """Log the trial results as metrics to MLflow. @@ -315,10 +308,8 @@ def _log_metrics(self, values: Optional[List[float]]) -> None: else: names = [*self._metric_name] - # MLflow handles up to 1000 metrics per request. metrics = {name: val for name, val in zip(names, values)} - for metric_chunk in _dict_chunks(metrics, MAX_METRICS_PER_BATCH): - mlflow.log_metrics(metric_chunk) + mlflow.log_metrics(metrics) @staticmethod def _log_params(params: Dict[str, Any]) -> None: @@ -327,23 +318,4 @@ def _log_params(params: Dict[str, Any]) -> None: Args: params: Trial params. """ - # MLflow handles up to 100 parameters per request. - for params_chunk in _dict_chunks(params, MAX_PARAMS_TAGS_PER_BATCH): - mlflow.log_params(params_chunk) - - -def _dict_chunks( - dict_data: Dict[str, Any], num_elements_per_dict: int -) -> Generator[Dict[str, Any], None, None]: - """Splits a dictionary into chunks of maximum size num_elements_per_dict. - - Args: - dict_data: Dictionary to be chunked. - num_elements_per_dict: Maximum size of each chunk. - - Returns: - Generator of dictionaries. - """ - it = iter(dict_data) - for _ in range(0, len(dict_data), num_elements_per_dict): - yield {k: dict_data[k] for k in islice(it, num_elements_per_dict)} + mlflow.log_params(params) diff --git a/tests/integration_tests/test_mlflow.py b/tests/integration_tests/test_mlflow.py index d148624af7..77ff62c839 100644 --- a/tests/integration_tests/test_mlflow.py +++ b/tests/integration_tests/test_mlflow.py @@ -527,36 +527,3 @@ def test_multiobjective_raises_on_name_mismatch(tmpdir: py.path.local, metrics: with pytest.raises(ValueError): study.optimize(_multiobjective_func, n_trials=1, callbacks=[mlflc]) - - -def test_chunk_info(tmpdir: py.path.local) -> None: - - num_objective = mlflow.utils.validation.MAX_METRICS_PER_BATCH + 1 - num_params = mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH + 1 - - def objective(trial: optuna.trial.Trial) -> Tuple[float, ...]: - for i in range(num_params): - trial.suggest_float(f"x_{i}", 0, 1) - - return tuple([1.0] * num_objective) - - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 1 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name, directions=["maximize"] * num_objective) - study.optimize(objective, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.list_experiments()[0] - run_infos = mlfl_client.list_run_infos(experiment.experiment_id) - assert len(run_infos) == n_trials - - run = mlfl_client.get_run(run_infos[0].run_id) - run_dict = run.to_dictionary() - - # The `tags` contains param's distributions and other information too, such as trial number. - assert len(run_dict["data"]["tags"]) > num_params - assert len(run_dict["data"]["params"]) == num_params - assert len(run_dict["data"]["metrics"]) == num_objective