Skip to content

Commit

Permalink
Odsc-56003/fix eval delete (#818)
Browse files Browse the repository at this point in the history
  • Loading branch information
mingkang111 committed May 6, 2024
1 parent 10d9393 commit 2031b02
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 45 deletions.
65 changes: 42 additions & 23 deletions ads/aqua/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ class AquaEvaluationApp(AquaApp):
_report_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
_metrics_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
_eval_cache = TTLCache(maxsize=200, ttl=timedelta(hours=10), timer=datetime.now)
_deletion_cache = TTLCache(
maxsize=10, ttl=timedelta(minutes=10), timer=datetime.now
)
_cache_lock = Lock()

@telemetry(entry_point="plugin=evaluation&action=create", name="aqua")
Expand Down Expand Up @@ -1371,6 +1374,7 @@ def _cancel_job_run(run, model):
@telemetry(entry_point="plugin=evaluation&action=delete", name="aqua")
def delete(self, eval_id):
"""Deletes the job and the associated model for the given evaluation id.
Parameters
----------
eval_id: str
Expand All @@ -1383,9 +1387,9 @@ def delete(self, eval_id):
Raises
------
AquaRuntimeError:
if a model doesn't exist for the given eval_id
if a model doesn't exist for the given eval_id.
AquaMissingKeyError:
if training_id is missing the job run id
if job/jobrun id is missing.
"""

model = DataScienceModel.from_id(eval_id)
Expand All @@ -1407,9 +1411,21 @@ def delete(self, eval_id):

self._delete_job_and_model(job, model)

try:
jobrun_id = model.custom_metadata_list.get(
EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value
).value
jobrun = utils.query_resource(jobrun_id, return_all=False)
except Exception:
logger.debug("Associated Job Run OCID is missing.")
jobrun = None

self._eval_cache.pop(key=eval_id, default=None)
self._deletion_cache.__setitem__(key=eval_id, value="")

status = dict(
id=eval_id,
lifecycle_state="DELETING",
lifecycle_state=jobrun.lifecycle_state if jobrun else "DELETING",
time_accepted=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f%z"),
)
return status
Expand Down Expand Up @@ -1697,30 +1713,33 @@ def _get_status(
] = None,
) -> dict:
"""Builds evaluation status based on the model status and job run status.
When detect `aqua_evaluation_error` in custom metadata, the jobrun is failed.
However, if jobrun failed before saving this meta, we need to check the existance
of the evaluation artifact.
When missing jobrun information, the status will be decided based on:
* If the evaluation just has been deleted, the jobrun status should be deleted.
* When detect `aqua_evaluation_error` in custom metadata, the jobrun is failed.
* If jobrun failed before saving this meta, we need to check the existance
of the evaluation artifact.
"""
# TODO: revisit for CANCELED evaluation
job_run_status = (
JobRun.LIFECYCLE_STATE_FAILED
if self._get_attribute_from_model_metadata(
model_status = model.lifecycle_state
job_run_status = None

if jobrun:
job_run_status = jobrun.lifecycle_state

if jobrun is None:
if model.identifier in self._deletion_cache.keys():
job_run_status = JobRun.LIFECYCLE_STATE_DELETED

elif self._get_attribute_from_model_metadata(
model, EvaluationCustomMetadata.EVALUATION_ERROR.value
)
else None
)
):
job_run_status = JobRun.LIFECYCLE_STATE_FAILED

model_status = model.lifecycle_state
job_run_status = job_run_status or (
jobrun.lifecycle_state
if jobrun and not jobrun.lifecycle_state == JobRun.LIFECYCLE_STATE_DELETED
else (
JobRun.LIFECYCLE_STATE_SUCCEEDED
if self._if_eval_artifact_exist(model)
else JobRun.LIFECYCLE_STATE_FAILED
)
)
elif self._if_eval_artifact_exist(model):
job_run_status = JobRun.LIFECYCLE_STATE_SUCCEEDED
else:
job_run_status = JobRun.LIFECYCLE_STATE_FAILED

lifecycle_state = utils.LifecycleStatus.get_status(
evaluation_status=model_status, job_run_status=job_run_status
Expand Down
39 changes: 17 additions & 22 deletions tests/unitary/with_extras/aqua/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,36 +527,28 @@ def test_create_evaluation(

def test_get_service_model_name(self):
# get service model name from fine tuned model deployment
source = (
ModelDeployment()
.with_freeform_tags(
**{
Tags.AQUA_TAG.value: UNKNOWN,
Tags.AQUA_FINE_TUNED_MODEL_TAG.value: "test_service_model_id#test_service_model_name",
Tags.AQUA_MODEL_NAME_TAG.value: "test_fine_tuned_model_name"
}
)
source = ModelDeployment().with_freeform_tags(
**{
Tags.AQUA_TAG.value: UNKNOWN,
Tags.AQUA_FINE_TUNED_MODEL_TAG.value: "test_service_model_id#test_service_model_name",
Tags.AQUA_MODEL_NAME_TAG.value: "test_fine_tuned_model_name",
}
)
service_model_name = self.app._get_service_model_name(source)
assert service_model_name == "test_service_model_name"

# get service model name from model deployment
source = (
ModelDeployment()
.with_freeform_tags(
**{
Tags.AQUA_TAG.value: "active",
Tags.AQUA_MODEL_NAME_TAG.value: "test_service_model_name"
}
)
source = ModelDeployment().with_freeform_tags(
**{
Tags.AQUA_TAG.value: "active",
Tags.AQUA_MODEL_NAME_TAG.value: "test_service_model_name",
}
)
service_model_name = self.app._get_service_model_name(source)
assert service_model_name == "test_service_model_name"

# get service model name from service model
source = DataScienceModel(
display_name="test_service_model_name"
)
source = DataScienceModel(display_name="test_service_model_name")
service_model_name = self.app._get_service_model_name(source)
assert service_model_name == "test_service_model_name"

Expand Down Expand Up @@ -807,6 +799,7 @@ def test_get_status(self):
@parameterized.expand(
[
(
"artifact_exist",
dict(
return_value=oci.response.Response(
status=200, request=MagicMock(), headers=MagicMock(), data=None
Expand All @@ -815,6 +808,7 @@ def test_get_status(self):
"SUCCEEDED",
),
(
"artifact_missing",
dict(
side_effect=oci.exceptions.ServiceError(
status=404, code=None, message="error test msg", headers={}
Expand All @@ -825,7 +819,7 @@ def test_get_status(self):
]
)
def test_get_status_when_missing_jobrun(
self, mock_head_model_artifact_response, expected_output
self, name, mock_head_model_artifact_response, expected_output
):
"""Tests getting evaluation status correctly when missing jobrun association."""
self.app.ds_client.get_model_provenance = MagicMock(
Expand All @@ -839,13 +833,14 @@ def test_get_status_when_missing_jobrun(
)
)
self.app._fetch_jobrun = MagicMock(return_value=None)

self.app._deletion_cache.clear()
self.app.ds_client.head_model_artifact = MagicMock(
side_effect=mock_head_model_artifact_response.get("side_effect", None),
return_value=mock_head_model_artifact_response.get("return_value", None),
)

response = self.app.get_status(TestDataset.EVAL_ID)

self.app.ds_client.head_model_artifact.assert_called_with(
model_id=TestDataset.EVAL_ID
)
Expand Down

0 comments on commit 2031b02

Please sign in to comment.