Skip to content

Commit

Permalink
[Projects] Allow deleting archived projects without leader [1.6.x] (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
alonmr committed Mar 17, 2024
1 parent 79a661a commit e8f6cd0
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 56 deletions.
63 changes: 52 additions & 11 deletions server/api/api/endpoints/projects.py
Expand Up @@ -185,6 +185,15 @@ async def delete_project(
server.api.api.deps.get_db_session
),
):
# check if project exists
try:
project = await run_in_threadpool(
get_project_member().get_project, db_session, name, auth_info.session
)
except mlrun.errors.MLRunNotFoundError:
logger.info("Project not found, nothing to delete", project=name)
return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value)

# delete project can be responsible for deleting schedules. Schedules are running only on chief,
# that is why we re-route requests to chief
if (
Expand Down Expand Up @@ -219,7 +228,7 @@ async def delete_project(
# wait for this background task to complete before marking the task as done.
task, _ = await run_in_threadpool(
server.api.api.utils.get_or_create_project_deletion_background_task,
name,
project,
deletion_strategy,
db_session,
auth_info,
Expand All @@ -228,16 +237,46 @@ async def delete_project(
background_tasks.add_task(task)
return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value)

is_running_in_background = await run_in_threadpool(
get_project_member().delete_project,
db_session,
name,
deletion_strategy,
auth_info.projects_role,
auth_info,
wait_for_completion=wait_for_completion,
)
if is_running_in_background:
is_running_in_background = False
force_delete = False
try:
is_running_in_background = await run_in_threadpool(
get_project_member().delete_project,
db_session,
name,
deletion_strategy,
auth_info.projects_role,
auth_info,
wait_for_completion=wait_for_completion,
)
except mlrun.errors.MLRunNotFoundError as exc:
if server.api.utils.helpers.is_request_from_leader(auth_info.projects_role):
raise exc

if project.status.state != mlrun.common.schemas.ProjectState.archived:
raise mlrun.errors.MLRunPreconditionFailedError(
f"Failed to delete project {name}. Project not found in leader, but it is not in archived state."
)

logger.warning(
"Project not found in leader, ensuring project deleted in mlrun",
project_name=name,
err=mlrun.errors.err_to_str(exc),
)
force_delete = True

if force_delete:
# In this case the wrapper delete project request is the one deleting the project because it
# doesn't exist in the leader.
await run_in_threadpool(
server.api.crud.Projects().delete_project,
db_session,
name,
deletion_strategy,
auth_info,
)

elif is_running_in_background:
return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value)

else:
Expand All @@ -248,6 +287,8 @@ async def delete_project(
)

await get_project_member().post_delete_project(name)
if force_delete:
return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value)
return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value)


Expand Down
38 changes: 27 additions & 11 deletions server/api/api/endpoints/projects_v2.py
Expand Up @@ -58,22 +58,13 @@ async def delete_project(
):
# check if project exists
try:
await run_in_threadpool(
project = await run_in_threadpool(
get_project_member().get_project, db_session, name, auth_info.session
)
except mlrun.errors.MLRunNotFoundError:
logger.info("Project not found, nothing to delete", project=name)
return fastapi.Response(status_code=http.HTTPStatus.NO_CONTENT.value)

# usually the CRUD for delete project will check permissions, however, since we are running the crud in a background
# task, we need to check permissions here. skip permission check if the request is from the leader.
if not server.api.utils.helpers.is_request_from_leader(auth_info.projects_role):
await server.api.utils.auth.verifier.AuthVerifier().query_project_permissions(
name,
mlrun.common.schemas.AuthorizationAction.delete,
auth_info,
)

# delete project can be responsible for deleting schedules. Schedules are running only on chief,
# that is why we re-route requests to chief
if (
Expand All @@ -90,6 +81,31 @@ async def delete_project(
name=name, request=request, api_version="v2"
)

# usually the CRUD for delete project will check permissions, however, since we are running the crud in a background
# task, we need to check permissions here. skip permission check if the request is from the leader.
if not server.api.utils.helpers.is_request_from_leader(auth_info.projects_role):
skip_permission_check = False
if project.status.state == mlrun.common.schemas.ProjectState.archived:
try:
await run_in_threadpool(
get_project_member().get_project,
db_session,
name,
auth_info.session,
from_leader=True,
)
except mlrun.errors.MLRunNotFoundError:
skip_permission_check = True

if not skip_permission_check:
await (
server.api.utils.auth.verifier.AuthVerifier().query_project_permissions(
name,
mlrun.common.schemas.AuthorizationAction.delete,
auth_info,
)
)

# we need to implement the verify_project_is_empty, since we don't want
# to spawn a background task for this, only to return a response
if deletion_strategy.strategy_to_check():
Expand All @@ -105,7 +121,7 @@ async def delete_project(

task, task_name = await run_in_threadpool(
server.api.api.utils.get_or_create_project_deletion_background_task,
name,
project,
deletion_strategy,
db_session,
auth_info,
Expand Down
62 changes: 46 additions & 16 deletions server/api/api/utils.py
Expand Up @@ -1068,8 +1068,8 @@ def artifact_project_and_resource_name_extractor(artifact):


def get_or_create_project_deletion_background_task(
project_name: str, deletion_strategy: str, db_session, auth_info
) -> typing.Tuple[typing.Callable, str]:
project: mlrun.common.schemas.Project, deletion_strategy: str, db_session, auth_info
) -> typing.Tuple[typing.Optional[typing.Callable], str]:
"""
This method is responsible for creating a background task for deleting a project.
The project deletion flow is as follows:
Expand Down Expand Up @@ -1114,7 +1114,7 @@ def get_or_create_project_deletion_background_task(
# therefore doesn't wait for the project deletion to complete.
wait_for_project_deletion = True

background_task_kind = background_task_kind_format.format(project_name)
background_task_kind = background_task_kind_format.format(project.metadata.name)
try:
task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_active_background_task_by_kind(
background_task_kind,
Expand All @@ -1134,7 +1134,7 @@ def get_or_create_project_deletion_background_task(
_delete_project,
background_task_name,
db_session=db_session,
project_name=project_name,
project=project,
deletion_strategy=deletion_strategy,
auth_info=auth_info,
wait_for_project_deletion=wait_for_project_deletion,
Expand All @@ -1144,24 +1144,54 @@ def get_or_create_project_deletion_background_task(

async def _delete_project(
db_session: sqlalchemy.orm.Session,
project_name: str,
project: mlrun.common.schemas.Project,
deletion_strategy: mlrun.common.schemas.DeletionStrategy,
auth_info: mlrun.common.schemas.AuthInfo,
wait_for_project_deletion: bool,
background_task_name: str,
):
await run_in_threadpool(
get_project_member().delete_project,
db_session,
project_name,
deletion_strategy,
auth_info.projects_role,
auth_info,
wait_for_completion=True,
background_task_name=background_task_name,
)
force_delete = False
project_name = project.metadata.name
try:
await run_in_threadpool(
get_project_member().delete_project,
db_session,
project_name,
deletion_strategy,
auth_info.projects_role,
auth_info,
wait_for_completion=True,
background_task_name=background_task_name,
)
except mlrun.errors.MLRunNotFoundError as exc:
if server.api.utils.helpers.is_request_from_leader(auth_info.projects_role):
raise exc

if project.status.state != mlrun.common.schemas.ProjectState.archived:
raise mlrun.errors.MLRunPreconditionFailedError(
f"Failed to delete project {project_name}. "
"Project not found in leader, but it is not in archived state."
)

logger.warning(
"Project not found in leader, ensuring project is deleted in mlrun",
project_name=project_name,
exc=err_to_str(exc),
)
force_delete = True

if force_delete:
# In this case the wrapper delete project job is the one deleting the project because it
# doesn't exist in the leader.
await run_in_threadpool(
server.api.crud.Projects().delete_project,
db_session,
project_name,
deletion_strategy,
auth_info,
)

if wait_for_project_deletion:
elif wait_for_project_deletion:
await run_in_threadpool(
verify_project_is_deleted,
project_name,
Expand Down
25 changes: 18 additions & 7 deletions server/api/utils/clients/iguazio.py
Expand Up @@ -576,13 +576,24 @@ def _get_project_from_iguazio_without_parsing(
params = {"include": "owner"}
if enrich_owner_access_key:
params["enrich_owner_access_key"] = "true"
return self._send_request_to_api(
"GET",
f"projects/__name__/{name}",
"Failed getting project from Iguazio",
session,
params=params,
)
try:
return self._send_request_to_api(
"GET",
f"projects/__name__/{name}",
"Failed getting project from Iguazio",
session,
params=params,
)
except requests.HTTPError as exc:
if exc.response.status_code != http.HTTPStatus.NOT_FOUND.value:
raise
self._logger.debug(
"Project not found in Iguazio",
name=name,
)
raise mlrun.errors.MLRunNotFoundError(
"Project not found in Iguazio"
) from exc

def _get_project_from_iguazio(
self, session: str, name: str, include_owner_session: bool = False
Expand Down
3 changes: 3 additions & 0 deletions server/api/utils/projects/follower.py
Expand Up @@ -212,7 +212,10 @@ def get_project(
db_session: sqlalchemy.orm.Session,
name: str,
leader_session: typing.Optional[str] = None,
from_leader: bool = False,
) -> mlrun.common.schemas.Project:
if from_leader:
return self._leader_client.get_project(leader_session, name)
return server.api.crud.Projects().get_project(db_session, name)

def get_project_owner(
Expand Down
1 change: 1 addition & 0 deletions server/api/utils/projects/leader.py
Expand Up @@ -125,6 +125,7 @@ def get_project(
db_session: sqlalchemy.orm.Session,
name: str,
leader_session: typing.Optional[str] = None,
from_leader: bool = False,
) -> mlrun.common.schemas.Project:
return self._leader_follower.get_project(db_session, name)

Expand Down
1 change: 1 addition & 0 deletions server/api/utils/projects/member.py
Expand Up @@ -103,6 +103,7 @@ def get_project(
db_session: sqlalchemy.orm.Session,
name: str,
leader_session: typing.Optional[str] = None,
from_leader: bool = False,
) -> mlrun.common.schemas.Project:
pass

Expand Down
53 changes: 42 additions & 11 deletions tests/api/api/test_projects.py
Expand Up @@ -968,27 +968,47 @@ def test_delete_project_not_found_in_leader(
mock_project_follower_iguazio_client,
delete_api_version: str,
) -> None:
project = mlrun.common.schemas.Project(
metadata=mlrun.common.schemas.ProjectMetadata(name="project-name"),
archived_project = mlrun.common.schemas.Project(
metadata=mlrun.common.schemas.ProjectMetadata(name="archived-project"),
spec=mlrun.common.schemas.ProjectSpec(),
status=mlrun.common.schemas.ProjectStatus(
state=mlrun.common.schemas.ProjectState.archived
),
)

response = unversioned_client.post("v1/projects", json=project.dict())
online_project = mlrun.common.schemas.Project(
metadata=mlrun.common.schemas.ProjectMetadata(name="online-project"),
spec=mlrun.common.schemas.ProjectSpec(),
)

response = unversioned_client.post("v1/projects", json=archived_project.dict())
assert response.status_code == HTTPStatus.CREATED.value
_assert_project_response(project, response)
_assert_project_response(archived_project, response)

response = unversioned_client.post("v1/projects", json=online_project.dict())
assert response.status_code == HTTPStatus.CREATED.value
_assert_project_response(online_project, response)

with unittest.mock.patch.object(
mock_project_follower_iguazio_client,
"delete_project",
side_effect=mlrun.errors.MLRunNotFoundError("Project not found in Iguazio"),
side_effect=mlrun.errors.MLRunNotFoundError("Project not found"),
):
response = unversioned_client.delete(
f"{delete_api_version}/projects/{project.metadata.name}",
f"{delete_api_version}/projects/{archived_project.metadata.name}",
)
if delete_api_version == "v1":
assert response.status_code == HTTPStatus.NOT_FOUND.value
assert "Project not found in Iguazio" in response.json()["detail"]
else:
assert response.status_code == HTTPStatus.ACCEPTED.value

response = unversioned_client.get(
f"v1/projects/{archived_project.metadata.name}",
)
assert response.status_code == HTTPStatus.NOT_FOUND.value

response = unversioned_client.delete(
f"{delete_api_version}/projects/{online_project.metadata.name}",
)
if response.status_code == HTTPStatus.ACCEPTED.value:
assert delete_api_version == "v2"
background_task = mlrun.common.schemas.BackgroundTask(**response.json())
background_task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_background_task(
background_task.metadata.name
Expand All @@ -997,7 +1017,18 @@ def test_delete_project_not_found_in_leader(
background_task.status.state
== mlrun.common.schemas.BackgroundTaskState.failed
)
assert "Project not found in Iguazio" in background_task.status.error
assert (
"Failed to delete project online-project. Project not found in leader, but it is not in archived state."
in background_task.status.error
)

else:
assert response.status_code == HTTPStatus.PRECONDITION_FAILED.value

response = unversioned_client.get(
f"v1/projects/{online_project.metadata.name}",
)
assert response.status_code == HTTPStatus.OK.value


# Test should not run more than a few seconds because we test that if the background task fails,
Expand Down

0 comments on commit e8f6cd0

Please sign in to comment.