Odsc-56003/fix eval delete (#818)

oracle · May 6, 2024 · 2031b02 · 2031b02
1 parent 10d9393
commit 2031b02
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 45 deletions.
diff --git a/ads/aqua/evaluation.py b/ads/aqua/evaluation.py
@@ -367,6 +367,9 @@ class AquaEvaluationApp(AquaApp):
     _report_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
     _metrics_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
     _eval_cache = TTLCache(maxsize=200, ttl=timedelta(hours=10), timer=datetime.now)
+    _deletion_cache = TTLCache(
+        maxsize=10, ttl=timedelta(minutes=10), timer=datetime.now
+    )
     _cache_lock = Lock()
 
     @telemetry(entry_point="plugin=evaluation&action=create", name="aqua")
@@ -1371,6 +1374,7 @@ def _cancel_job_run(run, model):
     @telemetry(entry_point="plugin=evaluation&action=delete", name="aqua")
     def delete(self, eval_id):
         """Deletes the job and the associated model for the given evaluation id.
+
         Parameters
         ----------
         eval_id: str
@@ -1383,9 +1387,9 @@ def delete(self, eval_id):
         Raises
         ------
         AquaRuntimeError:
-            if a model doesn't exist for the given eval_id
+            if a model doesn't exist for the given eval_id.
         AquaMissingKeyError:
-            if training_id is missing the job run id
+            if job/jobrun id is missing.
         """
 
         model = DataScienceModel.from_id(eval_id)
@@ -1407,9 +1411,21 @@ def delete(self, eval_id):
 
         self._delete_job_and_model(job, model)
 
+        try:
+            jobrun_id = model.custom_metadata_list.get(
+                EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value
+            ).value
+            jobrun = utils.query_resource(jobrun_id, return_all=False)
+        except Exception:
+            logger.debug("Associated Job Run OCID is missing.")
+            jobrun = None
+
+        self._eval_cache.pop(key=eval_id, default=None)
+        self._deletion_cache.__setitem__(key=eval_id, value="")
+
         status = dict(
             id=eval_id,
-            lifecycle_state="DELETING",
+            lifecycle_state=jobrun.lifecycle_state if jobrun else "DELETING",
             time_accepted=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f%z"),
         )
         return status
@@ -1697,30 +1713,33 @@ def _get_status(
         ] = None,
     ) -> dict:
         """Builds evaluation status based on the model status and job run status.
-        When detect `aqua_evaluation_error` in custom metadata, the jobrun is failed.
-        However, if jobrun failed before saving this meta, we need to check the existance
-        of the evaluation artifact.
+        When missing jobrun information, the status will be decided based on:
+
+            * If the evaluation just has been deleted, the jobrun status should be deleted.
+            * When detect `aqua_evaluation_error` in custom metadata, the jobrun is failed.
+            * If jobrun failed before saving this meta, we need to check the existance
+            of the evaluation artifact.
 
         """
-        # TODO: revisit for CANCELED evaluation
-        job_run_status = (
-            JobRun.LIFECYCLE_STATE_FAILED
-            if self._get_attribute_from_model_metadata(
+        model_status = model.lifecycle_state
+        job_run_status = None
+
+        if jobrun:
+            job_run_status = jobrun.lifecycle_state
+
+        if jobrun is None:
+            if model.identifier in self._deletion_cache.keys():
+                job_run_status = JobRun.LIFECYCLE_STATE_DELETED
+
+            elif self._get_attribute_from_model_metadata(
                 model, EvaluationCustomMetadata.EVALUATION_ERROR.value
-            )
-            else None
-        )
+            ):
+                job_run_status = JobRun.LIFECYCLE_STATE_FAILED
 
-        model_status = model.lifecycle_state
-        job_run_status = job_run_status or (
-            jobrun.lifecycle_state
-            if jobrun and not jobrun.lifecycle_state == JobRun.LIFECYCLE_STATE_DELETED
-            else (
-                JobRun.LIFECYCLE_STATE_SUCCEEDED
-                if self._if_eval_artifact_exist(model)
-                else JobRun.LIFECYCLE_STATE_FAILED
-            )
-        )
+            elif self._if_eval_artifact_exist(model):
+                job_run_status = JobRun.LIFECYCLE_STATE_SUCCEEDED
+            else:
+                job_run_status = JobRun.LIFECYCLE_STATE_FAILED
 
         lifecycle_state = utils.LifecycleStatus.get_status(
             evaluation_status=model_status, job_run_status=job_run_status

diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -527,36 +527,28 @@ def test_create_evaluation(
 
     def test_get_service_model_name(self):
         # get service model name from fine tuned model deployment
-        source = (
-            ModelDeployment()
-            .with_freeform_tags(
-                **{
-                    Tags.AQUA_TAG.value: UNKNOWN,
-                    Tags.AQUA_FINE_TUNED_MODEL_TAG.value: "test_service_model_id#test_service_model_name",
-                    Tags.AQUA_MODEL_NAME_TAG.value: "test_fine_tuned_model_name"
-                }
-            )
+        source = ModelDeployment().with_freeform_tags(
+            **{
+                Tags.AQUA_TAG.value: UNKNOWN,
+                Tags.AQUA_FINE_TUNED_MODEL_TAG.value: "test_service_model_id#test_service_model_name",
+                Tags.AQUA_MODEL_NAME_TAG.value: "test_fine_tuned_model_name",
+            }
         )
         service_model_name = self.app._get_service_model_name(source)
         assert service_model_name == "test_service_model_name"
 
         # get service model name from model deployment
-        source = (
-            ModelDeployment()
-            .with_freeform_tags(
-                **{
-                    Tags.AQUA_TAG.value: "active",
-                    Tags.AQUA_MODEL_NAME_TAG.value: "test_service_model_name"
-                }
-            )
+        source = ModelDeployment().with_freeform_tags(
+            **{
+                Tags.AQUA_TAG.value: "active",
+                Tags.AQUA_MODEL_NAME_TAG.value: "test_service_model_name",
+            }
         )
         service_model_name = self.app._get_service_model_name(source)
         assert service_model_name == "test_service_model_name"
 
         # get service model name from service model
-        source = DataScienceModel(
-            display_name="test_service_model_name"
-        )
+        source = DataScienceModel(display_name="test_service_model_name")
         service_model_name = self.app._get_service_model_name(source)
         assert service_model_name == "test_service_model_name"
 
@@ -807,6 +799,7 @@ def test_get_status(self):
     @parameterized.expand(
         [
             (
+                "artifact_exist",
                 dict(
                     return_value=oci.response.Response(
                         status=200, request=MagicMock(), headers=MagicMock(), data=None
@@ -815,6 +808,7 @@ def test_get_status(self):
                 "SUCCEEDED",
             ),
             (
+                "artifact_missing",
                 dict(
                     side_effect=oci.exceptions.ServiceError(
                         status=404, code=None, message="error test msg", headers={}
@@ -825,7 +819,7 @@ def test_get_status(self):
         ]
     )
     def test_get_status_when_missing_jobrun(
-        self, mock_head_model_artifact_response, expected_output
+        self, name, mock_head_model_artifact_response, expected_output
     ):
         """Tests getting evaluation status correctly when missing jobrun association."""
         self.app.ds_client.get_model_provenance = MagicMock(
@@ -839,13 +833,14 @@ def test_get_status_when_missing_jobrun(
             )
         )
         self.app._fetch_jobrun = MagicMock(return_value=None)
-
+        self.app._deletion_cache.clear()
         self.app.ds_client.head_model_artifact = MagicMock(
             side_effect=mock_head_model_artifact_response.get("side_effect", None),
             return_value=mock_head_model_artifact_response.get("return_value", None),
         )
 
         response = self.app.get_status(TestDataset.EVAL_ID)
+
         self.app.ds_client.head_model_artifact.assert_called_with(
             model_id=TestDataset.EVAL_ID
         )