Skip to content

Commit

Permalink
[Runs] Log run state to be patched (#5476)
Browse files Browse the repository at this point in the history
  • Loading branch information
liranbg committed Apr 30, 2024
1 parent 64a306a commit 8a13381
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 31 deletions.
67 changes: 37 additions & 30 deletions server/api/crud/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,36 +73,14 @@ def update_run(
data: dict,
):
project = project or mlrun.mlconf.default_project
logger.debug("Updating run", project=project, uid=uid, iter=iter)
# TODO: Abort run moved to a separate endpoint, remove this section once in 1.8.0
# (once 1.5.x clients are not supported)
if (
data
and data.get("status.state") == mlrun.runtimes.constants.RunStates.aborted
):
current_run = server.api.utils.singletons.db.get_db().read_run(
db_session, uid, project, iter
)
if (
current_run.get("status", {}).get("state")
in mlrun.runtimes.constants.RunStates.terminal_states()
):
raise mlrun.errors.MLRunConflictError(
"Run is already in terminal state, can not be aborted"
)
runtime_kind = current_run.get("metadata", {}).get("labels", {}).get("kind")
if runtime_kind not in mlrun.runtimes.RuntimeKinds.abortable_runtimes():
raise mlrun.errors.MLRunBadRequestError(
f"Run of kind {runtime_kind} can not be aborted"
)
# aborting the run meaning deleting its runtime resources
# TODO: runtimes crud interface should ideally expose some better API that will hold inside itself the
# "knowledge" on the label selector
server.api.crud.RuntimeResources().delete_runtime_resources(
db_session,
label_selector=f"mlrun/project={project},mlrun/uid={uid}",
force=True,
)
run_state = data.get("status.state") if data else None
logger.debug(
"Updating run", project=project, uid=uid, iter=iter, run_state=run_state
)

# Note: Abort run moved to a separated endpoint
# TODO: Remove below function for 1.8.0 (once 1.5.x clients are not supported)
self._update_aborted_run(db_session, project, uid, iter, data)
server.api.utils.singletons.db.get_db().update_run(
db_session, data, uid, project, iter
)
Expand Down Expand Up @@ -447,3 +425,32 @@ async def _post_delete_run(project, uid):
project,
uid,
)

def _update_aborted_run(self, db_session, project, uid, iter, data):
if (
data
and data.get("status.state") == mlrun.runtimes.constants.RunStates.aborted
):
current_run = server.api.utils.singletons.db.get_db().read_run(
db_session, uid, project, iter
)
if (
current_run.get("status", {}).get("state")
in mlrun.runtimes.constants.RunStates.terminal_states()
):
raise mlrun.errors.MLRunConflictError(
"Run is already in terminal state, can not be aborted"
)
runtime_kind = current_run.get("metadata", {}).get("labels", {}).get("kind")
if runtime_kind not in mlrun.runtimes.RuntimeKinds.abortable_runtimes():
raise mlrun.errors.MLRunBadRequestError(
f"Run of kind {runtime_kind} can not be aborted"
)
# aborting the run meaning deleting its runtime resources
# TODO: runtimes crud interface should ideally expose some better API that will hold inside itself the
# "knowledge" on the label selector
server.api.crud.RuntimeResources().delete_runtime_resources(
db_session,
label_selector=f"mlrun/project={project},mlrun/uid={uid}",
force=True,
)
2 changes: 1 addition & 1 deletion server/api/runtime_handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,7 +1624,7 @@ def _ensure_run_state(
run_state=run_state,
)

logger.info("Updating run state", run_state=run_state)
logger.info("Updating run state", run_uid=uid, run_state=run_state)
run_updates = {
"status.state": run_state,
"status.last_update": now_date().isoformat(),
Expand Down

0 comments on commit 8a13381

Please sign in to comment.