Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] Add delete artifact data endpoints #5477

Merged
merged 21 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions mlrun/common/schemas/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,18 @@ class Artifact(pydantic.BaseModel):
metadata: ArtifactMetadata
spec: ArtifactSpec
status: ObjectStatus


class ArtifactsDeletionStrategies(mlrun.common.types.StrEnum):
"""Artifacts deletion strategies types."""

metadata_only = "metadata-only"
"""Only removes the artifact db record, leaving all related artifact data in-place"""

data_optional = "data-optional"
"""Delete the artifact data of the artifact as a best-effort.
If artifact data deletion fails still try to delete the artifact db record"""

data_force = "data-force"
"""Delete the artifact data, and if cannot delete it fail the deletion
and don’t delete the artifact db record"""
16 changes: 15 additions & 1 deletion server/api/api/endpoints/artifacts_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import server.api.crud
import server.api.utils.auth.verifier
import server.api.utils.singletons.project_member
from mlrun.common.schemas.artifact import ArtifactsFormat
from mlrun.common.schemas.artifact import ArtifactsDeletionStrategies, ArtifactsFormat
from mlrun.utils import logger
from server.api.api import deps
from server.api.api.utils import artifact_project_and_resource_name_extractor
Expand Down Expand Up @@ -226,9 +226,20 @@ async def delete_artifact(
tree: str = None,
tag: str = None,
object_uid: str = Query(None, alias="object-uid"),
deletion_strategy: ArtifactsDeletionStrategies = ArtifactsDeletionStrategies.metadata_only,
secrets: dict = None,
auth_info: mlrun.common.schemas.AuthInfo = Depends(deps.authenticate_request),
db_session: Session = Depends(deps.get_db_session),
):
logger.debug(
"Deleting artifact",
project=project,
key=key,
tag=tag,
producer_id=tree,
deletion_strategy=deletion_strategy,
)

await server.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
mlrun.common.schemas.AuthorizationResourceTypes.artifact,
project,
Expand All @@ -244,6 +255,9 @@ async def delete_artifact(
project,
object_uid,
producer_id=tree,
deletion_strategy=deletion_strategy,
secrets=secrets,
auth_info=auth_info,
)
return Response(status_code=HTTPStatus.NO_CONTENT.value)

Expand Down
71 changes: 70 additions & 1 deletion server/api/crud/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,34 @@ def delete_artifact(
db_session: sqlalchemy.orm.Session,
key: str,
tag: str = "latest",
project: str = mlrun.mlconf.default_project,
project: str = None,
object_uid: str = None,
producer_id: str = None,
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
),
secrets: dict = None,
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
):
project = project or mlrun.mlconf.default_project

# delete artifacts data by deletion strategy
if deletion_strategy in [
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.data_optional,
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.data_force,
]:
self._delete_artifact_data(
db_session,
key,
tag,
project,
object_uid,
producer_id,
deletion_strategy,
secrets,
auth_info,
)

return server.api.utils.singletons.db.get_db().del_artifact(
db_session, key, tag, project, object_uid, producer_id=producer_id
)
Expand Down Expand Up @@ -227,3 +250,49 @@ def _resolve_artifact_size(artifact, auth_info):
)
if "spec" in artifact and "inline" in artifact["spec"]:
validate_inline_artifact_body_size(artifact["spec"]["inline"])

def _delete_artifact_data(
self,
db_session: sqlalchemy.orm.Session,
key: str,
tag: str = "latest",
project: str = mlrun.mlconf.default_project,
moranbental marked this conversation as resolved.
Show resolved Hide resolved
object_uid: str = None,
producer_id: str = None,
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
),
secrets: dict = None,
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
):
logger.debug("Deleting artifact data", project=project, key=key, tag=tag)

try:
artifact = self.get_artifact(
db_session,
key,
tag,
project=project,
producer_id=producer_id,
object_uid=object_uid,
)
path = artifact["spec"]["target_path"]
alonmr marked this conversation as resolved.
Show resolved Hide resolved
server.api.crud.Files().delete_artifact_data(
auth_info, project, path, secrets
)
except Exception as exc:
logger.debug(
"Failed delete artifact data",
key=key,
project=project,
deletion_strategy=deletion_strategy,
err=err_to_str(exc),
)

if (
deletion_strategy
== mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.data_force
):
raise mlrun.errors.MLRunInternalServerError(
"Failed to delete artifact data"
) from exc
71 changes: 63 additions & 8 deletions server/api/crud/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
import mimetypes
from http import HTTPStatus

import mlrun.common.schemas
import mlrun.utils.singleton
import server.api.api.utils
import server.api.utils.auth.verifier
import server.api.utils.singletons.k8s
from mlrun import store_manager
from mlrun.errors import err_to_str
from mlrun.utils import logger
Expand All @@ -35,6 +38,23 @@ def get_filestat(
):
return self._get_filestat(schema, path, user, auth_info, secrets=secrets)

def delete_artifact_data(
self,
auth_info: mlrun.common.schemas.AuthInfo,
project: str,
path: str = "",
schema: str = None,
user: str = "",
secrets: dict = None,
):
secrets = secrets or {}
project_secrets = self._verify_and_get_project_secrets(project)
project_secrets.update(secrets)

self._delete_artifact_data(
schema, path, user, auth_info, project_secrets, project
)

def _get_filestat(
self,
schema: str,
Expand All @@ -43,15 +63,8 @@ def _get_filestat(
auth_info: mlrun.common.schemas.AuthInfo,
secrets: dict = None,
):
_, filename = path.split(path)
path = self._resolve_obj_path(schema, path, user)

path = server.api.api.utils.get_obj_path(schema, path, user=user)
if not path:
server.api.api.utils.log_and_raise(
HTTPStatus.NOT_FOUND.value,
path=path,
err="illegal path prefix or schema",
)
logger.debug("Got get filestat request", path=path)

secrets = secrets or {}
Expand All @@ -74,3 +87,45 @@ def _get_filestat(
"modified": stat.modified,
"mimetype": ctype,
}

def _delete_artifact_data(
self,
schema: str,
path: str,
user: str,
auth_info: mlrun.common.schemas.AuthInfo,
secrets: dict = None,
project: str = "",
):
path = self._resolve_obj_path(schema, path, user)

secrets = secrets or {}
secrets.update(server.api.api.utils.get_secrets(auth_info))

obj = store_manager.object(url=path, secrets=secrets, project=project)
obj.delete()

@staticmethod
def _resolve_obj_path(schema: str, path: str, user: str):
path = server.api.api.utils.get_obj_path(schema, path, user=user)
if not path:
server.api.api.utils.log_and_raise(
HTTPStatus.NOT_FOUND.value,
path=path,
err="illegal path prefix or schema",
)
return path

@staticmethod
def _verify_and_get_project_secrets(project):
if not server.api.utils.singletons.k8s.get_k8s_helper(
silent=True
).is_running_inside_kubernetes_cluster():
return {}

secrets_data = server.api.crud.Secrets().list_project_secrets(
project,
mlrun.common.schemas.SecretProviderName.kubernetes,
allow_secrets_from_k8s=True,
)
return secrets_data.secrets or {}
16 changes: 15 additions & 1 deletion server/api/rundb/sqldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from sqlalchemy.exc import SQLAlchemyError

import mlrun.common.schemas
import mlrun.common.schemas.artifact
import mlrun.db.factory
import mlrun.model_monitoring.model_endpoint
import server.api.crud
Expand Down Expand Up @@ -233,13 +234,26 @@ def list_artifacts(
producer_id=tree,
)

def del_artifact(self, key, tag="", project="", tree=None, uid=None):
def del_artifact(
self,
key,
tag="",
project="",
tree=None,
uid=None,
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
),
secrets: dict = None,
):
return self._transform_db_error(
server.api.crud.Artifacts().delete_artifact,
self.session,
key,
tag,
project,
deletion_strategy=deletion_strategy,
secrets=secrets,
)

def del_artifacts(self, name="", project="", tag="", labels=None):
Expand Down
61 changes: 58 additions & 3 deletions tests/api/api/test_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import mlrun.artifacts
import mlrun.common.schemas
import server.api.api.endpoints.artifacts_v2
import server.api.crud.files
from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
from mlrun.utils.helpers import is_legacy_artifact

Expand All @@ -35,11 +37,11 @@
STORE_API_ARTIFACTS_PATH = API_ARTIFACTS_PATH + "/{uid}/{key}?tag={tag}"
GET_API_ARTIFACT_PATH = API_ARTIFACTS_PATH + "/{key}?tag={tag}"
LIST_API_ARTIFACTS_PATH_WITH_TAG = API_ARTIFACTS_PATH + "?tag={tag}"
DELETE_API_ARTIFACTS_PATH = API_ARTIFACTS_PATH + "/{key}?tag={tag}"
DELETE_API_ARTIFACTS_PATH = API_ARTIFACTS_PATH + "/{key}"

# V2 endpoints
V2_PREFIX = "v2/"
STORE_API_ARTIFACTS_V2_PATH = V2_PREFIX + API_ARTIFACTS_PATH + "/{key}"
DELETE_API_ARTIFACTS_V2_PATH = V2_PREFIX + DELETE_API_ARTIFACTS_PATH


def test_list_artifact_tags(db: Session, client: TestClient) -> None:
Expand Down Expand Up @@ -189,7 +191,7 @@ def test_create_artifact(db: Session, unversioned_client: TestClient):
},
"status": {},
}
url = f"v2/projects/{PROJECT}/artifacts"
url = V2_PREFIX + API_ARTIFACTS_PATH.format(project=PROJECT)
resp = unversioned_client.post(
url,
json=data,
Expand Down Expand Up @@ -241,6 +243,59 @@ def test_delete_artifacts_after_storing_empty_dict(db: Session, client: TestClie
assert len(resp.json()["artifacts"]) == 0


@pytest.mark.parametrize(
"deletion_strategy, expected_status_code",
[
(
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.data_optional,
HTTPStatus.NO_CONTENT.value,
),
(
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.data_force,
HTTPStatus.INTERNAL_SERVER_ERROR.value,
),
],
)
def test_fails_deleting_artifact_data(
deletion_strategy, expected_status_code, db: Session, unversioned_client: TestClient
):
# This test attempts to delete the artifact data, but fails - the request should
# be failed or succeeded by the deletion strategy.
_create_project(unversioned_client)
artifact = mlrun.artifacts.Artifact(key=KEY, body="123", target_path="dummy-path")

resp = unversioned_client.post(
STORE_API_ARTIFACTS_PATH.format(project=PROJECT, uid=UID, key=KEY, tag=TAG),
data=artifact.to_json(),
)
assert resp.status_code == HTTPStatus.OK.value

url = DELETE_API_ARTIFACTS_V2_PATH.format(project=PROJECT, key=KEY)
url_with_deletion_strategy = url + "?deletion_strategy={deletion_strategy}"

with unittest.mock.patch(
"server.api.crud.files.Files.delete_artifact_data",
side_effect=Exception("some error"),
):
resp = unversioned_client.delete(
url_with_deletion_strategy.format(deletion_strategy=deletion_strategy)
)
assert resp.status_code == expected_status_code


def test_delete_artifact_data_default_deletion_strategy(
db: Session, unversioned_client: TestClient
):
server.api.crud.Files.delete_artifact_data = unittest.mock.MagicMock()

# checking metadata-only as default deletion_strategy
url = DELETE_API_ARTIFACTS_V2_PATH.format(project=PROJECT, key=KEY)
resp = unversioned_client.delete(url)
server.api.crud.Files.delete_artifact_data.assert_not_called()
server.api.crud.Files.delete_artifact_data.reset_mock()
assert resp.status_code == HTTPStatus.NO_CONTENT.value


def test_list_artifacts(db: Session, client: TestClient) -> None:
_create_project(client)

Expand Down
Loading
Loading