Skip to content

Commit

Permalink
Improve error messages dealing with deleted experiments (#814)
Browse files Browse the repository at this point in the history
In particular set_experiment and create_experiment to return more descriptive error messages when they are called with a name for an experiment that already exists but has been deleted. Currently:

- set_experiment will succeed but following start_run calls will fail because the experiment is not active.
- create_experiment will fail saying that there is already an experiment with the same name.

With this PR:
- set_experiment will not succeed, with an error message saying there is a deleted experiment with the same name.
- create_experiment will fail saying that there is already a deleted experiment with the same name.
Added unit tests to cover these code paths.
  • Loading branch information
sueann committed Jan 17, 2019
1 parent a92d1bf commit 3a37840
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
12 changes: 10 additions & 2 deletions mlflow/store/file_store.py
Expand Up @@ -166,8 +166,16 @@ def create_experiment(self, name, artifact_location=None):
databricks_pb2.INVALID_PARAMETER_VALUE)
experiment = self.get_experiment_by_name(name)
if experiment is not None:
raise MlflowException("Experiment '%s' already exists." % experiment.name,
databricks_pb2.RESOURCE_ALREADY_EXISTS)
if experiment.lifecycle_stage == Experiment.DELETED_LIFECYCLE:
raise MlflowException(
"Experiment '%s' already exists in deleted state. "
"You can restore the experiment, or permanently delete the experiment "
"from the .trash folder (under tracking server's root folder) before "
"creating a new one with the same name." % experiment.name,
databricks_pb2.RESOURCE_ALREADY_EXISTS)
else:
raise MlflowException("Experiment '%s' already exists." % experiment.name,
databricks_pb2.RESOURCE_ALREADY_EXISTS)
# Get all existing experiments and find the one with largest ID.
# len(list_all(..)) would not work when experiments are deleted.
experiments_ids = [e.experiment_id for e in self.list_experiments(ViewType.ALL)]
Expand Down
5 changes: 5 additions & 0 deletions mlflow/tracking/fluent.py
Expand Up @@ -48,6 +48,11 @@ def set_experiment(experiment_name):
if exp_id is None: # id can be 0
print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name))
exp_id = client.create_experiment(experiment_name)
elif experiment.lifecycle_stage == Experiment.DELETED_LIFECYCLE:
raise MlflowException(
"Cannot set a deleted experiment '%s' as the active experiment."
" You can restore the experiment, or permanently delete the "
" experiment to create a new one." % experiment.name)
global _active_experiment_id
_active_experiment_id = exp_id

Expand Down
31 changes: 29 additions & 2 deletions tests/tracking/test_tracking.py
Expand Up @@ -9,7 +9,7 @@

import mlflow
from mlflow import tracking
from mlflow.entities import RunStatus
from mlflow.entities import Experiment, RunStatus
from mlflow.exceptions import MlflowException
from mlflow.tracking.client import MlflowClient
from mlflow.tracking.fluent import start_run, end_run
Expand All @@ -32,6 +32,18 @@ def test_create_experiment(tracking_uri_mock):
assert exp_id is not None


def test_create_experiment_with_duplicate_name(tracking_uri_mock):
name = "popular_name"
exp_id = mlflow.create_experiment(name)

with pytest.raises(MlflowException):
mlflow.create_experiment(name)

tracking.MlflowClient().delete_experiment(exp_id)
with pytest.raises(MlflowException):
mlflow.create_experiment(name)


def test_set_experiment(tracking_uri_mock, reset_active_experiment):
with pytest.raises(TypeError):
mlflow.set_experiment()
Expand All @@ -57,9 +69,24 @@ def test_set_experiment(tracking_uri_mock, reset_active_experiment):
end_run()


def test_set_experiment_with_deleted_experiment_name(tracking_uri_mock):
name = "dead_exp"
mlflow.set_experiment(name)
run = start_run()
end_run()
exp_id = run.info.experiment_id

tracking.MlflowClient().delete_experiment(exp_id)

with pytest.raises(MlflowException):
mlflow.set_experiment(name)


def test_set_experiment_with_zero_id(reset_mock, reset_active_experiment):
reset_mock(MlflowClient, "get_experiment_by_name",
mock.Mock(return_value=attrdict.AttrDict(experiment_id=0)))
mock.Mock(return_value=attrdict.AttrDict(
experiment_id=0,
lifecycle_stage=Experiment.ACTIVE_LIFECYCLE)))
reset_mock(MlflowClient, "create_experiment", mock.Mock())

mlflow.set_experiment("my_exp")
Expand Down

0 comments on commit 3a37840

Please sign in to comment.