From e8f6cd0e7189afdfbaf0fd562eaea9d33bfb3ff2 Mon Sep 17 00:00:00 2001 From: Alon Maor <48641682+alonmr@users.noreply.github.com> Date: Sun, 17 Mar 2024 09:07:53 +0200 Subject: [PATCH] [Projects] Allow deleting archived projects without leader [1.6.x] (#5290) --- server/api/api/endpoints/projects.py | 63 ++++++++++++++++++++----- server/api/api/endpoints/projects_v2.py | 38 ++++++++++----- server/api/api/utils.py | 62 +++++++++++++++++------- server/api/utils/clients/iguazio.py | 25 +++++++--- server/api/utils/projects/follower.py | 3 ++ server/api/utils/projects/leader.py | 1 + server/api/utils/projects/member.py | 1 + tests/api/api/test_projects.py | 53 ++++++++++++++++----- 8 files changed, 190 insertions(+), 56 deletions(-) diff --git a/server/api/api/endpoints/projects.py b/server/api/api/endpoints/projects.py index 56ee4b5b752..5c9ccfb2212 100644 --- a/server/api/api/endpoints/projects.py +++ b/server/api/api/endpoints/projects.py @@ -185,6 +185,15 @@ async def delete_project( server.api.api.deps.get_db_session ), ): + # check if project exists + try: + project = await run_in_threadpool( + get_project_member().get_project, db_session, name, auth_info.session + ) + except mlrun.errors.MLRunNotFoundError: + logger.info("Project not found, nothing to delete", project=name) + return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value) + # delete project can be responsible for deleting schedules. Schedules are running only on chief, # that is why we re-route requests to chief if ( @@ -219,7 +228,7 @@ async def delete_project( # wait for this background task to complete before marking the task as done. task, _ = await run_in_threadpool( server.api.api.utils.get_or_create_project_deletion_background_task, - name, + project, deletion_strategy, db_session, auth_info, @@ -228,16 +237,46 @@ async def delete_project( background_tasks.add_task(task) return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value) - is_running_in_background = await run_in_threadpool( - get_project_member().delete_project, - db_session, - name, - deletion_strategy, - auth_info.projects_role, - auth_info, - wait_for_completion=wait_for_completion, - ) - if is_running_in_background: + is_running_in_background = False + force_delete = False + try: + is_running_in_background = await run_in_threadpool( + get_project_member().delete_project, + db_session, + name, + deletion_strategy, + auth_info.projects_role, + auth_info, + wait_for_completion=wait_for_completion, + ) + except mlrun.errors.MLRunNotFoundError as exc: + if server.api.utils.helpers.is_request_from_leader(auth_info.projects_role): + raise exc + + if project.status.state != mlrun.common.schemas.ProjectState.archived: + raise mlrun.errors.MLRunPreconditionFailedError( + f"Failed to delete project {name}. Project not found in leader, but it is not in archived state." + ) + + logger.warning( + "Project not found in leader, ensuring project deleted in mlrun", + project_name=name, + err=mlrun.errors.err_to_str(exc), + ) + force_delete = True + + if force_delete: + # In this case the wrapper delete project request is the one deleting the project because it + # doesn't exist in the leader. + await run_in_threadpool( + server.api.crud.Projects().delete_project, + db_session, + name, + deletion_strategy, + auth_info, + ) + + elif is_running_in_background: return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value) else: @@ -248,6 +287,8 @@ async def delete_project( ) await get_project_member().post_delete_project(name) + if force_delete: + return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value) return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value) diff --git a/server/api/api/endpoints/projects_v2.py b/server/api/api/endpoints/projects_v2.py index ad119992733..4b6815533ac 100644 --- a/server/api/api/endpoints/projects_v2.py +++ b/server/api/api/endpoints/projects_v2.py @@ -58,22 +58,13 @@ async def delete_project( ): # check if project exists try: - await run_in_threadpool( + project = await run_in_threadpool( get_project_member().get_project, db_session, name, auth_info.session ) except mlrun.errors.MLRunNotFoundError: logger.info("Project not found, nothing to delete", project=name) return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value) - # usually the CRUD for delete project will check permissions, however, since we are running the crud in a background - # task, we need to check permissions here. skip permission check if the request is from the leader. - if not server.api.utils.helpers.is_request_from_leader(auth_info.projects_role): - await server.api.utils.auth.verifier.AuthVerifier().query_project_permissions( - name, - mlrun.common.schemas.AuthorizationAction.delete, - auth_info, - ) - # delete project can be responsible for deleting schedules. Schedules are running only on chief, # that is why we re-route requests to chief if ( @@ -90,6 +81,31 @@ async def delete_project( name=name, request=request, api_version="v2" ) + # usually the CRUD for delete project will check permissions, however, since we are running the crud in a background + # task, we need to check permissions here. skip permission check if the request is from the leader. + if not server.api.utils.helpers.is_request_from_leader(auth_info.projects_role): + skip_permission_check = False + if project.status.state == mlrun.common.schemas.ProjectState.archived: + try: + await run_in_threadpool( + get_project_member().get_project, + db_session, + name, + auth_info.session, + from_leader=True, + ) + except mlrun.errors.MLRunNotFoundError: + skip_permission_check = True + + if not skip_permission_check: + await ( + server.api.utils.auth.verifier.AuthVerifier().query_project_permissions( + name, + mlrun.common.schemas.AuthorizationAction.delete, + auth_info, + ) + ) + # we need to implement the verify_project_is_empty, since we don't want # to spawn a background task for this, only to return a response if deletion_strategy.strategy_to_check(): @@ -105,7 +121,7 @@ async def delete_project( task, task_name = await run_in_threadpool( server.api.api.utils.get_or_create_project_deletion_background_task, - name, + project, deletion_strategy, db_session, auth_info, diff --git a/server/api/api/utils.py b/server/api/api/utils.py index dfde191532a..18e00c1ae5b 100644 --- a/server/api/api/utils.py +++ b/server/api/api/utils.py @@ -1068,8 +1068,8 @@ def artifact_project_and_resource_name_extractor(artifact): def get_or_create_project_deletion_background_task( - project_name: str, deletion_strategy: str, db_session, auth_info -) -> typing.Tuple[typing.Callable, str]: + project: mlrun.common.schemas.Project, deletion_strategy: str, db_session, auth_info +) -> typing.Tuple[typing.Optional[typing.Callable], str]: """ This method is responsible for creating a background task for deleting a project. The project deletion flow is as follows: @@ -1114,7 +1114,7 @@ def get_or_create_project_deletion_background_task( # therefore doesn't wait for the project deletion to complete. wait_for_project_deletion = True - background_task_kind = background_task_kind_format.format(project_name) + background_task_kind = background_task_kind_format.format(project.metadata.name) try: task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_active_background_task_by_kind( background_task_kind, @@ -1134,7 +1134,7 @@ def get_or_create_project_deletion_background_task( _delete_project, background_task_name, db_session=db_session, - project_name=project_name, + project=project, deletion_strategy=deletion_strategy, auth_info=auth_info, wait_for_project_deletion=wait_for_project_deletion, @@ -1144,24 +1144,54 @@ def get_or_create_project_deletion_background_task( async def _delete_project( db_session: sqlalchemy.orm.Session, - project_name: str, + project: mlrun.common.schemas.Project, deletion_strategy: mlrun.common.schemas.DeletionStrategy, auth_info: mlrun.common.schemas.AuthInfo, wait_for_project_deletion: bool, background_task_name: str, ): - await run_in_threadpool( - get_project_member().delete_project, - db_session, - project_name, - deletion_strategy, - auth_info.projects_role, - auth_info, - wait_for_completion=True, - background_task_name=background_task_name, - ) + force_delete = False + project_name = project.metadata.name + try: + await run_in_threadpool( + get_project_member().delete_project, + db_session, + project_name, + deletion_strategy, + auth_info.projects_role, + auth_info, + wait_for_completion=True, + background_task_name=background_task_name, + ) + except mlrun.errors.MLRunNotFoundError as exc: + if server.api.utils.helpers.is_request_from_leader(auth_info.projects_role): + raise exc + + if project.status.state != mlrun.common.schemas.ProjectState.archived: + raise mlrun.errors.MLRunPreconditionFailedError( + f"Failed to delete project {project_name}. " + "Project not found in leader, but it is not in archived state." + ) + + logger.warning( + "Project not found in leader, ensuring project is deleted in mlrun", + project_name=project_name, + exc=err_to_str(exc), + ) + force_delete = True + + if force_delete: + # In this case the wrapper delete project job is the one deleting the project because it + # doesn't exist in the leader. + await run_in_threadpool( + server.api.crud.Projects().delete_project, + db_session, + project_name, + deletion_strategy, + auth_info, + ) - if wait_for_project_deletion: + elif wait_for_project_deletion: await run_in_threadpool( verify_project_is_deleted, project_name, diff --git a/server/api/utils/clients/iguazio.py b/server/api/utils/clients/iguazio.py index 9e62332a5a5..3129181c1f6 100644 --- a/server/api/utils/clients/iguazio.py +++ b/server/api/utils/clients/iguazio.py @@ -576,13 +576,24 @@ def _get_project_from_iguazio_without_parsing( params = {"include": "owner"} if enrich_owner_access_key: params["enrich_owner_access_key"] = "true" - return self._send_request_to_api( - "GET", - f"projects/__name__/{name}", - "Failed getting project from Iguazio", - session, - params=params, - ) + try: + return self._send_request_to_api( + "GET", + f"projects/__name__/{name}", + "Failed getting project from Iguazio", + session, + params=params, + ) + except requests.HTTPError as exc: + if exc.response.status_code != http.HTTPStatus.NOT_FOUND.value: + raise + self._logger.debug( + "Project not found in Iguazio", + name=name, + ) + raise mlrun.errors.MLRunNotFoundError( + "Project not found in Iguazio" + ) from exc def _get_project_from_iguazio( self, session: str, name: str, include_owner_session: bool = False diff --git a/server/api/utils/projects/follower.py b/server/api/utils/projects/follower.py index 93c25569fa0..5dfec7c83de 100644 --- a/server/api/utils/projects/follower.py +++ b/server/api/utils/projects/follower.py @@ -212,7 +212,10 @@ def get_project( db_session: sqlalchemy.orm.Session, name: str, leader_session: typing.Optional[str] = None, + from_leader: bool = False, ) -> mlrun.common.schemas.Project: + if from_leader: + return self._leader_client.get_project(leader_session, name) return server.api.crud.Projects().get_project(db_session, name) def get_project_owner( diff --git a/server/api/utils/projects/leader.py b/server/api/utils/projects/leader.py index 148393a1071..48cad385394 100644 --- a/server/api/utils/projects/leader.py +++ b/server/api/utils/projects/leader.py @@ -125,6 +125,7 @@ def get_project( db_session: sqlalchemy.orm.Session, name: str, leader_session: typing.Optional[str] = None, + from_leader: bool = False, ) -> mlrun.common.schemas.Project: return self._leader_follower.get_project(db_session, name) diff --git a/server/api/utils/projects/member.py b/server/api/utils/projects/member.py index 05e48647361..adb8b543083 100644 --- a/server/api/utils/projects/member.py +++ b/server/api/utils/projects/member.py @@ -103,6 +103,7 @@ def get_project( db_session: sqlalchemy.orm.Session, name: str, leader_session: typing.Optional[str] = None, + from_leader: bool = False, ) -> mlrun.common.schemas.Project: pass diff --git a/tests/api/api/test_projects.py b/tests/api/api/test_projects.py index cb1530f8951..9b24e7c1341 100644 --- a/tests/api/api/test_projects.py +++ b/tests/api/api/test_projects.py @@ -968,27 +968,47 @@ def test_delete_project_not_found_in_leader( mock_project_follower_iguazio_client, delete_api_version: str, ) -> None: - project = mlrun.common.schemas.Project( - metadata=mlrun.common.schemas.ProjectMetadata(name="project-name"), + archived_project = mlrun.common.schemas.Project( + metadata=mlrun.common.schemas.ProjectMetadata(name="archived-project"), spec=mlrun.common.schemas.ProjectSpec(), + status=mlrun.common.schemas.ProjectStatus( + state=mlrun.common.schemas.ProjectState.archived + ), ) - response = unversioned_client.post("v1/projects", json=project.dict()) + online_project = mlrun.common.schemas.Project( + metadata=mlrun.common.schemas.ProjectMetadata(name="online-project"), + spec=mlrun.common.schemas.ProjectSpec(), + ) + + response = unversioned_client.post("v1/projects", json=archived_project.dict()) assert response.status_code == HTTPStatus.CREATED.value - _assert_project_response(project, response) + _assert_project_response(archived_project, response) + + response = unversioned_client.post("v1/projects", json=online_project.dict()) + assert response.status_code == HTTPStatus.CREATED.value + _assert_project_response(online_project, response) with unittest.mock.patch.object( mock_project_follower_iguazio_client, "delete_project", - side_effect=mlrun.errors.MLRunNotFoundError("Project not found in Iguazio"), + side_effect=mlrun.errors.MLRunNotFoundError("Project not found"), ): response = unversioned_client.delete( - f"{delete_api_version}/projects/{project.metadata.name}", + f"{delete_api_version}/projects/{archived_project.metadata.name}", ) - if delete_api_version == "v1": - assert response.status_code == HTTPStatus.NOT_FOUND.value - assert "Project not found in Iguazio" in response.json()["detail"] - else: + assert response.status_code == HTTPStatus.ACCEPTED.value + + response = unversioned_client.get( + f"v1/projects/{archived_project.metadata.name}", + ) + assert response.status_code == HTTPStatus.NOT_FOUND.value + + response = unversioned_client.delete( + f"{delete_api_version}/projects/{online_project.metadata.name}", + ) + if response.status_code == HTTPStatus.ACCEPTED.value: + assert delete_api_version == "v2" background_task = mlrun.common.schemas.BackgroundTask(**response.json()) background_task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_background_task( background_task.metadata.name @@ -997,7 +1017,18 @@ def test_delete_project_not_found_in_leader( background_task.status.state == mlrun.common.schemas.BackgroundTaskState.failed ) - assert "Project not found in Iguazio" in background_task.status.error + assert ( + "Failed to delete project online-project. Project not found in leader, but it is not in archived state." + in background_task.status.error + ) + + else: + assert response.status_code == HTTPStatus.PRECONDITION_FAILED.value + + response = unversioned_client.get( + f"v1/projects/{online_project.metadata.name}", + ) + assert response.status_code == HTTPStatus.OK.value # Test should not run more than a few seconds because we test that if the background task fails,