diff --git a/mlrun/api/api/endpoints/projects.py b/mlrun/api/api/endpoints/projects.py index 0b4e69c88df..e8469dacbc5 100644 --- a/mlrun/api/api/endpoints/projects.py +++ b/mlrun/api/api/endpoints/projects.py @@ -49,9 +49,13 @@ def get_project(name: str, db_session: Session = Depends(deps.get_db_session)): @router.delete("/projects/{name}", status_code=HTTPStatus.NO_CONTENT.value) def delete_project( - name: str, db_session: Session = Depends(deps.get_db_session), + name: str, + deletion_strategy: schemas.DeletionStrategy = Header( + schemas.DeletionStrategy.default(), alias=schemas.HeaderNames.deletion_strategy + ), + db_session: Session = Depends(deps.get_db_session), ): - get_project_member().delete_project(db_session, name) + get_project_member().delete_project(db_session, name, deletion_strategy) return Response(status_code=HTTPStatus.NO_CONTENT.value) diff --git a/mlrun/api/db/base.py b/mlrun/api/db/base.py index 92a48839c11..4b3a2812e25 100644 --- a/mlrun/api/db/base.py +++ b/mlrun/api/db/base.py @@ -216,7 +216,12 @@ def patch_project( pass @abstractmethod - def delete_project(self, session, name: str): + def delete_project( + self, + session, + name: str, + deletion_strategy: schemas.DeletionStrategy = schemas.DeletionStrategy.default(), + ): pass @abstractmethod diff --git a/mlrun/api/db/filedb/db.py b/mlrun/api/db/filedb/db.py index 685cde7fb09..62f2f576782 100644 --- a/mlrun/api/db/filedb/db.py +++ b/mlrun/api/db/filedb/db.py @@ -166,7 +166,12 @@ def get_project( ) -> schemas.Project: raise NotImplementedError() - def delete_project(self, session, name: str): + def delete_project( + self, + session, + name: str, + deletion_strategy: schemas.DeletionStrategy = schemas.DeletionStrategy.default(), + ): raise NotImplementedError() def create_feature_set( diff --git a/mlrun/api/db/sqldb/db.py b/mlrun/api/db/sqldb/db.py index 066d271333f..9b37e81b1a4 100644 --- a/mlrun/api/db/sqldb/db.py +++ b/mlrun/api/db/sqldb/db.py @@ -86,9 +86,12 @@ def delete_log(self, session: Session, project: str, uid: str): def _delete_logs(self, session: Session, project: str): logger.debug("Removing logs from db", project=project) - for log in self._query(session, Log, project=project): + for log in self._list_logs(session, project): self.delete_log(session, project, log.uid) + def _list_logs(self, session: Session, project: str): + return self._query(session, Log, project=project).all() + def store_run(self, session, run_data, uid, project="", iter=0): project = project or config.default_project logger.debug( @@ -393,9 +396,12 @@ def delete_function(self, session: Session, project: str, name: str): self._delete(session, Function, project=project, name=name) def _delete_functions(self, session: Session, project: str): - for function in self._query(session, Function, project=project): + for function in self._list_project_functions(session, project): self.delete_function(session, project, function.name) + def _list_project_functions(self, session: Session, project: str): + return self._query(session, Function, project=project).all() + def _delete_resources_tags(self, session: Session, project: str): for tagged_class in _tagged: self._delete(session, tagged_class, project=project) @@ -696,20 +702,28 @@ def get_project( return self._transform_project_record_to_schema(session, project_record) - def delete_project(self, session: Session, name: str): - logger.debug("Deleting project from DB", name=name) - self.del_artifacts(session, project=name) - self._delete_logs(session, name) - self.del_runs(session, project=name) - self._delete_schedules(session, name) - self._delete_functions(session, name) - self._delete_feature_sets(session, name) - self._delete_feature_vectors(session, name) - - # resources deletion should remove their tags and labels as well, but doing another try in case there are - # orphan resources - self._delete_resources_tags(session, name) - self._delete_resources_labels(session, name) + def delete_project( + self, + session: Session, + name: str, + deletion_strategy: schemas.DeletionStrategy = schemas.DeletionStrategy.default(), + ): + logger.debug( + "Deleting project from DB", name=name, deletion_strategy=deletion_strategy + ) + if deletion_strategy == schemas.DeletionStrategy.restrict: + project_record = self._get_project_record( + session, name, raise_on_not_found=False + ) + if not project_record: + return + self._verify_project_has_no_related_resources(session, name) + elif deletion_strategy == schemas.DeletionStrategy.cascade: + self._delete_project_related_resources(session, name) + else: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Unknown deletion strategy: {deletion_strategy}" + ) self._delete(session, Project, name=name) def list_projects( @@ -796,6 +810,55 @@ def _get_project_record( return project_record + def _verify_project_has_no_related_resources(self, session: Session, project: str): + artifacts = self._find_artifacts(session, project, "*") + self._verify_empty_list_of_project_related_resources( + project, artifacts, "artifacts" + ) + logs = self._list_logs(session, project) + self._verify_empty_list_of_project_related_resources(project, logs, "logs") + runs = self._find_runs(session, None, project, []).all() + self._verify_empty_list_of_project_related_resources(project, runs, "runs") + schedules = self.list_schedules(session, project=project) + self._verify_empty_list_of_project_related_resources( + project, schedules, "schedules" + ) + functions = self._list_project_functions(session, project) + self._verify_empty_list_of_project_related_resources( + project, functions, "functions" + ) + feature_sets = self.list_feature_sets(session, project).feature_sets + self._verify_empty_list_of_project_related_resources( + project, feature_sets, "feature_sets" + ) + feature_vectors = self.list_feature_vectors(session, project).feature_vectors + self._verify_empty_list_of_project_related_resources( + project, feature_vectors, "feature_vectors" + ) + + def _delete_project_related_resources(self, session: Session, name: str): + self.del_artifacts(session, project=name) + self._delete_logs(session, name) + self.del_runs(session, project=name) + self._delete_schedules(session, name) + self._delete_functions(session, name) + self._delete_feature_sets(session, name) + self._delete_feature_vectors(session, name) + + # resources deletion should remove their tags and labels as well, but doing another try in case there are + # orphan resources + self._delete_resources_tags(session, name) + self._delete_resources_labels(session, name) + + @staticmethod + def _verify_empty_list_of_project_related_resources( + project: str, resources: List, resource_name: str + ): + if resources: + raise mlrun.errors.MLRunPreconditionFailedError( + f"Project {project} can not be deleted since related resources found: {resource_name}" + ) + def _get_record_by_name_tag_and_uid( self, session, cls, project: str, name: str, tag: str = None, uid: str = None, ): diff --git a/mlrun/api/schemas/__init__.py b/mlrun/api/schemas/__init__.py index 5c8b0e2854c..95810942808 100644 --- a/mlrun/api/schemas/__init__.py +++ b/mlrun/api/schemas/__init__.py @@ -8,7 +8,7 @@ BackgroundTaskSpec, BackgroundTaskStatus, ) -from .constants import Format, PatchMode, HeaderNames +from .constants import Format, PatchMode, HeaderNames, DeletionStrategy from .feature_store import ( Feature, FeatureRecord, diff --git a/mlrun/api/schemas/constants.py b/mlrun/api/schemas/constants.py index 0b2da74aa94..32aa301ef2b 100644 --- a/mlrun/api/schemas/constants.py +++ b/mlrun/api/schemas/constants.py @@ -25,8 +25,28 @@ def to_mergedeep_strategy(self) -> mergedeep.Strategy: ) +class DeletionStrategy(str, Enum): + restrict = "restrict" + cascade = "cascade" + + @staticmethod + def default(): + return DeletionStrategy.restrict + + def to_nuclio_deletion_strategy(self) -> str: + if self.value == DeletionStrategy.restrict: + return "restricted" + elif self.value == DeletionStrategy.cascade: + return "cascading" + else: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Unknown deletion strategy: {self.value}" + ) + + headers_prefix = "x-mlrun-" class HeaderNames: patch_mode = f"{headers_prefix}patch-mode" + deletion_strategy = f"{headers_prefix}deletion-strategy" diff --git a/mlrun/api/utils/clients/nuclio.py b/mlrun/api/utils/clients/nuclio.py index 995dfc16107..589d1fb7628 100644 --- a/mlrun/api/utils/clients/nuclio.py +++ b/mlrun/api/utils/clients/nuclio.py @@ -8,6 +8,7 @@ import mlrun.api.schemas import mlrun.api.utils.projects.remotes.member +import mlrun.errors import mlrun.utils.singleton from mlrun.utils import logger @@ -72,14 +73,33 @@ def patch_project( ] self._put_project_to_nuclio(response_body) - def delete_project(self, session: sqlalchemy.orm.Session, name: str): - logger.debug("Deleting project in Nuclio", name=name) + def delete_project( + self, + session: sqlalchemy.orm.Session, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): + logger.debug( + "Deleting project in Nuclio", name=name, deletion_strategy=deletion_strategy + ) body = self._generate_request_body( mlrun.api.schemas.Project( metadata=mlrun.api.schemas.ProjectMetadata(name=name) ) ) - self._send_request_to_api("DELETE", "projects", json=body) + headers = { + "x-nuclio-delete-project-strategy": deletion_strategy.to_nuclio_deletion_strategy(), + } + try: + self._send_request_to_api("DELETE", "projects", json=body, headers=headers) + except requests.HTTPError as exc: + if exc.response.status_code != http.HTTPStatus.NOT_FOUND.value: + raise + logger.debug( + "Project not found in Nuclio. Considering deletion as successful", + name=name, + deletion_strategy=deletion_strategy, + ) def get_project( self, session: sqlalchemy.orm.Session, name: str @@ -153,7 +173,7 @@ def _send_request_to_api(self, method, path, **kwargs): {"error": error, "error_stack_trace": error_stack_trace} ) logger.warning("Request to nuclio failed", **log_kwargs) - response.raise_for_status() + mlrun.errors.raise_for_status(response) return response @staticmethod diff --git a/mlrun/api/utils/projects/leader.py b/mlrun/api/utils/projects/leader.py index cc61c32568b..50f2cf249e3 100644 --- a/mlrun/api/utils/projects/leader.py +++ b/mlrun/api/utils/projects/leader.py @@ -83,8 +83,13 @@ def patch_project( self._run_on_all_followers("patch_project", session, name, project, patch_mode) return self.get_project(session, name) - def delete_project(self, session: sqlalchemy.orm.Session, name: str): - self._run_on_all_followers("delete_project", session, name) + def delete_project( + self, + session: sqlalchemy.orm.Session, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): + self._run_on_all_followers("delete_project", session, name, deletion_strategy) def get_project( self, session: sqlalchemy.orm.Session, name: str diff --git a/mlrun/api/utils/projects/member.py b/mlrun/api/utils/projects/member.py index d3f8a637cbd..19aa07f459f 100644 --- a/mlrun/api/utils/projects/member.py +++ b/mlrun/api/utils/projects/member.py @@ -47,7 +47,12 @@ def patch_project( pass @abc.abstractmethod - def delete_project(self, session: sqlalchemy.orm.Session, name: str): + def delete_project( + self, + session: sqlalchemy.orm.Session, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy.default(), + ): pass @abc.abstractmethod diff --git a/mlrun/api/utils/projects/remotes/member.py b/mlrun/api/utils/projects/remotes/member.py index b2eee9209e2..a82b4cf9e3f 100644 --- a/mlrun/api/utils/projects/remotes/member.py +++ b/mlrun/api/utils/projects/remotes/member.py @@ -33,7 +33,12 @@ def patch_project( pass @abc.abstractmethod - def delete_project(self, session: sqlalchemy.orm.Session, name: str): + def delete_project( + self, + session: sqlalchemy.orm.Session, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): pass @abc.abstractmethod diff --git a/mlrun/api/utils/projects/remotes/nop.py b/mlrun/api/utils/projects/remotes/nop.py index 3e09f2c974c..89701d4d8da 100644 --- a/mlrun/api/utils/projects/remotes/nop.py +++ b/mlrun/api/utils/projects/remotes/nop.py @@ -40,7 +40,12 @@ def patch_project( mergedeep.merge(existing_project_dict, project, strategy=strategy) self._projects[name] = mlrun.api.schemas.Project(**existing_project_dict) - def delete_project(self, session: sqlalchemy.orm.Session, name: str): + def delete_project( + self, + session: sqlalchemy.orm.Session, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): if name in self._projects: del self._projects[name] diff --git a/mlrun/db/base.py b/mlrun/db/base.py index ba393fe3b7d..d1834cc6668 100644 --- a/mlrun/db/base.py +++ b/mlrun/db/base.py @@ -118,7 +118,11 @@ def list_functions(self, name=None, project="", tag="", labels=None): pass @abstractmethod - def delete_project(self, name: str): + def delete_project( + self, + name: str, + deletion_strategy: schemas.DeletionStrategy = schemas.DeletionStrategy.default(), + ): pass @abstractmethod diff --git a/mlrun/db/filedb.py b/mlrun/db/filedb.py index c881a97fc7a..e0e9dd189d7 100644 --- a/mlrun/db/filedb.py +++ b/mlrun/db/filedb.py @@ -432,7 +432,11 @@ def list_projects( def get_project(self, name: str) -> mlrun.api.schemas.Project: raise NotImplementedError() - def delete_project(self, name: str): + def delete_project( + self, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): raise NotImplementedError() def store_project( diff --git a/mlrun/db/httpdb.py b/mlrun/db/httpdb.py index 7a74af80149..7abcdb10a3a 100644 --- a/mlrun/db/httpdb.py +++ b/mlrun/db/httpdb.py @@ -1005,10 +1005,19 @@ def get_project(self, name: str) -> mlrun.projects.MlrunProject: response = self.api_call("GET", path, error_message) return mlrun.projects.MlrunProject.from_dict(response.json()) - def delete_project(self, name: str): + def delete_project( + self, + name: str, + deletion_strategy: Union[ + str, mlrun.api.schemas.DeletionStrategy + ] = mlrun.api.schemas.DeletionStrategy.default(), + ): path = f"projects/{name}" + if isinstance(deletion_strategy, schemas.DeletionStrategy): + deletion_strategy = deletion_strategy.value + headers = {schemas.HeaderNames.deletion_strategy: deletion_strategy} error_message = f"Failed deleting project {name}" - self.api_call("DELETE", path, error_message) + self.api_call("DELETE", path, error_message, headers=headers) def store_project( self, diff --git a/mlrun/db/sqldb.py b/mlrun/db/sqldb.py index 59f53a41b0b..0911162f844 100644 --- a/mlrun/db/sqldb.py +++ b/mlrun/db/sqldb.py @@ -205,7 +205,11 @@ def create_project( ) -> mlrun.api.schemas.Project: raise NotImplementedError() - def delete_project(self, name: str): + def delete_project( + self, + name: str, + deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(), + ): raise NotImplementedError() def get_project( diff --git a/mlrun/errors.py b/mlrun/errors.py index d2875eb8a6e..d2bbec093e5 100644 --- a/mlrun/errors.py +++ b/mlrun/errors.py @@ -84,6 +84,10 @@ class MLRunConflictError(MLRunHTTPStatusError): error_status_code = HTTPStatus.CONFLICT.value +class MLRunPreconditionFailedError(MLRunHTTPStatusError): + error_status_code = HTTPStatus.PRECONDITION_FAILED.value + + class MLRunIncompatibleVersionError(MLRunHTTPStatusError): error_status_code = HTTPStatus.BAD_REQUEST.value @@ -94,4 +98,5 @@ class MLRunIncompatibleVersionError(MLRunHTTPStatusError): HTTPStatus.FORBIDDEN.value: MLRunAccessDeniedError, HTTPStatus.NOT_FOUND.value: MLRunNotFoundError, HTTPStatus.CONFLICT.value: MLRunConflictError, + HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError, } diff --git a/tests/api/api/test_projects.py b/tests/api/api/test_projects.py index d116f42c9ed..4300c3a44eb 100644 --- a/tests/api/api/test_projects.py +++ b/tests/api/api/test_projects.py @@ -6,6 +6,7 @@ from sqlalchemy.orm import Session import mlrun.api.schemas +import mlrun.errors def test_projects_crud(db: Session, client: TestClient) -> None: @@ -112,10 +113,34 @@ def test_projects_crud(db: Session, client: TestClient) -> None: extra_exclude={"spec": {"description", "desired_state"}}, ) - # delete - response = client.delete(f"/api/projects/{name1}") + # add function to project 1 + function_name = "function-name" + function = {"metadata": {"name": function_name}} + response = client.post(f"/api/func/{name1}/{function_name}", json=function) + assert response.status_code == HTTPStatus.OK.value + + # delete - restrict strategy, will fail because function exists + response = client.delete( + f"/api/projects/{name1}", + headers={ + mlrun.api.schemas.HeaderNames.deletion_strategy: mlrun.api.schemas.DeletionStrategy.restrict + }, + ) + assert response.status_code == HTTPStatus.PRECONDITION_FAILED.value + + # delete - cascade strategy, will succeed and delete function + response = client.delete( + f"/api/projects/{name1}", + headers={ + mlrun.api.schemas.HeaderNames.deletion_strategy: mlrun.api.schemas.DeletionStrategy.cascade + }, + ) assert response.status_code == HTTPStatus.NO_CONTENT.value + # ensure function is gone + response = client.get(f"/api/func/{name1}/{function_name}") + assert response.status_code == HTTPStatus.NOT_FOUND.value + # list response = client.get( "/api/projects", params={"format": mlrun.api.schemas.Format.name_only} diff --git a/tests/api/db/test_projects.py b/tests/api/db/test_projects.py index f174c96bd59..f3c33d2a292 100644 --- a/tests/api/db/test_projects.py +++ b/tests/api/db/test_projects.py @@ -43,7 +43,16 @@ def test_delete_project_with_resources( db, db_session, project_to_keep ) _assert_resources_in_project(db, db_session, project_to_remove) - db.delete_project(db_session, project_to_remove) + # deletion strategy - restrict - should fail because there are resources + with pytest.raises(mlrun.errors.MLRunPreconditionFailedError): + db.delete_project( + db_session, project_to_remove, mlrun.api.schemas.DeletionStrategy.restrict + ) + # deletion strategy - cascade - should succeed and remove all related resources + db.delete_project( + db_session, project_to_remove, mlrun.api.schemas.DeletionStrategy.cascade + ) + project_to_keep_table_name_records_count_map_after_project_removal = _assert_resources_in_project( db, db_session, project_to_keep ) @@ -59,6 +68,11 @@ def test_delete_project_with_resources( == {} ) + # deletion strategy - restrict - should succeed cause no project + db.delete_project( + db_session, project_to_remove, mlrun.api.schemas.DeletionStrategy.restrict + ) + # running only on sqldb cause filedb is not really a thing anymore, will be removed soon @pytest.mark.parametrize( diff --git a/tests/api/utils/clients/test_nuclio.py b/tests/api/utils/clients/test_nuclio.py index 3336e491eb1..a755959a069 100644 --- a/tests/api/utils/clients/test_nuclio.py +++ b/tests/api/utils/clients/test_nuclio.py @@ -7,6 +7,7 @@ import mlrun.api.schemas import mlrun.api.utils.clients.nuclio import mlrun.config +import mlrun.errors @pytest.fixture() @@ -330,11 +331,28 @@ def verify_deletion(request, context): ) == {} ) + assert ( + request.headers["x-nuclio-delete-project-strategy"] + == mlrun.api.schemas.DeletionStrategy.default().to_nuclio_deletion_strategy() + ) context.status_code = http.HTTPStatus.NO_CONTENT.value requests_mock.delete(f"{api_url}/api/projects", json=verify_deletion) nuclio_client.delete_project(None, project_name) + # assert ignoring (and not exploding) on not found + requests_mock.delete( + f"{api_url}/api/projects", status_code=http.HTTPStatus.NOT_FOUND.value + ) + nuclio_client.delete_project(None, project_name) + + # assert correctly propagating 412 errors (will be returned when project has functions) + requests_mock.delete( + f"{api_url}/api/projects", status_code=http.HTTPStatus.PRECONDITION_FAILED.value + ) + with pytest.raises(mlrun.errors.MLRunPreconditionFailedError): + nuclio_client.delete_project(None, project_name) + def _generate_project_body( name=None,