Improve error messages dealing with deleted experiments (#814)

In particular set_experiment and create_experiment to return more descriptive error messages when they are called with a name for an experiment that already exists but has been deleted. Currently: - set_experiment will succeed but following start_run calls will fail because the experiment is not active. - create_experiment will fail saying that there is already an experiment with the same name. With this PR: - set_experiment will not succeed, with an error message saying there is a deleted experiment with the same name. - create_experiment will fail saying that there is already a deleted experiment with the same name. Added unit tests to cover these code paths.
mlflow · Jan 17, 2019 · 3a37840 · 3a37840
1 parent a92d1bf
commit 3a37840
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 4 deletions.
diff --git a/mlflow/store/file_store.py b/mlflow/store/file_store.py
@@ -166,8 +166,16 @@ def create_experiment(self, name, artifact_location=None):
                                   databricks_pb2.INVALID_PARAMETER_VALUE)
         experiment = self.get_experiment_by_name(name)
         if experiment is not None:
-            raise MlflowException("Experiment '%s' already exists." % experiment.name,
-                                  databricks_pb2.RESOURCE_ALREADY_EXISTS)
+            if experiment.lifecycle_stage == Experiment.DELETED_LIFECYCLE:
+                raise MlflowException(
+                    "Experiment '%s' already exists in deleted state. "
+                    "You can restore the experiment, or permanently delete the experiment "
+                    "from the .trash folder (under tracking server's root folder) before "
+                    "creating a new one with the same name." % experiment.name,
+                    databricks_pb2.RESOURCE_ALREADY_EXISTS)
+            else:
+                raise MlflowException("Experiment '%s' already exists." % experiment.name,
+                                      databricks_pb2.RESOURCE_ALREADY_EXISTS)
         # Get all existing experiments and find the one with largest ID.
         # len(list_all(..)) would not work when experiments are deleted.
         experiments_ids = [e.experiment_id for e in self.list_experiments(ViewType.ALL)]

diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py
@@ -48,6 +48,11 @@ def set_experiment(experiment_name):
     if exp_id is None:  # id can be 0
         print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name))
         exp_id = client.create_experiment(experiment_name)
+    elif experiment.lifecycle_stage == Experiment.DELETED_LIFECYCLE:
+        raise MlflowException(
+            "Cannot set a deleted experiment '%s' as the active experiment."
+            " You can restore the experiment, or permanently delete the "
+            " experiment to create a new one." % experiment.name)
     global _active_experiment_id
     _active_experiment_id = exp_id
 

diff --git a/tests/tracking/test_tracking.py b/tests/tracking/test_tracking.py
@@ -9,7 +9,7 @@
 
 import mlflow
 from mlflow import tracking
-from mlflow.entities import RunStatus
+from mlflow.entities import Experiment, RunStatus
 from mlflow.exceptions import MlflowException
 from mlflow.tracking.client import MlflowClient
 from mlflow.tracking.fluent import start_run, end_run
@@ -32,6 +32,18 @@ def test_create_experiment(tracking_uri_mock):
     assert exp_id is not None
 
 
+def test_create_experiment_with_duplicate_name(tracking_uri_mock):
+    name = "popular_name"
+    exp_id = mlflow.create_experiment(name)
+
+    with pytest.raises(MlflowException):
+        mlflow.create_experiment(name)
+
+    tracking.MlflowClient().delete_experiment(exp_id)
+    with pytest.raises(MlflowException):
+        mlflow.create_experiment(name)
+
+
 def test_set_experiment(tracking_uri_mock, reset_active_experiment):
     with pytest.raises(TypeError):
         mlflow.set_experiment()
@@ -57,9 +69,24 @@ def test_set_experiment(tracking_uri_mock, reset_active_experiment):
     end_run()
 
 
+def test_set_experiment_with_deleted_experiment_name(tracking_uri_mock):
+    name = "dead_exp"
+    mlflow.set_experiment(name)
+    run = start_run()
+    end_run()
+    exp_id = run.info.experiment_id
+
+    tracking.MlflowClient().delete_experiment(exp_id)
+
+    with pytest.raises(MlflowException):
+        mlflow.set_experiment(name)
+
+
 def test_set_experiment_with_zero_id(reset_mock, reset_active_experiment):
     reset_mock(MlflowClient, "get_experiment_by_name",
-               mock.Mock(return_value=attrdict.AttrDict(experiment_id=0)))
+               mock.Mock(return_value=attrdict.AttrDict(
+                   experiment_id=0,
+                   lifecycle_stage=Experiment.ACTIVE_LIFECYCLE)))
     reset_mock(MlflowClient, "create_experiment", mock.Mock())
 
     mlflow.set_experiment("my_exp")