diff --git a/mlrun/api/crud/projects.py b/mlrun/api/crud/projects.py index a4edf30da37..30010e63f31 100644 --- a/mlrun/api/crud/projects.py +++ b/mlrun/api/crud/projects.py @@ -112,7 +112,12 @@ def _verify_project_has_no_external_resources(self, project: str): # an MLRun resource (such as model-endpoints) was already verified in previous checks. Therefore, any internal # secret existing here is something that the user needs to be notified about, as MLRun didn't generate it. # Therefore, this check should remain at the end of the verification flow. - if mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_keys(project): + if ( + mlrun.mlconf.is_api_running_on_k8s() + and mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_keys( + project + ) + ): raise mlrun.errors.MLRunPreconditionFailedError( f"Project {project} can not be deleted since related resources found: project secrets" ) @@ -145,7 +150,8 @@ def delete_project_resources( mlrun.api.crud.ModelEndpoints().delete_model_endpoints_resources(name) # delete project secrets - passing None will delete all secrets - mlrun.api.utils.singletons.k8s.get_k8s().delete_project_secrets(name, None) + if mlrun.mlconf.is_api_running_on_k8s(): + mlrun.api.utils.singletons.k8s.get_k8s().delete_project_secrets(name, None) def get_project( self, session: sqlalchemy.orm.Session, name: str diff --git a/mlrun/projects/project.py b/mlrun/projects/project.py index 6b2fbd529d7..03b51c8f5f6 100644 --- a/mlrun/projects/project.py +++ b/mlrun/projects/project.py @@ -97,6 +97,7 @@ def new_project( description: str = None, subpath: str = None, save: bool = True, + overwrite: bool = False, ) -> "MlrunProject": """Create a new MLRun project, optionally load it from a yaml/zip/git template @@ -133,6 +134,8 @@ def new_project( :param description: text describing the project :param subpath: project subpath (relative to the context dir) :param save: whether to save the created project in the DB + :param overwrite: overwrite project using 'cascade' deletion strategy (deletes project resources) + if project with name exists :returns: project object """ @@ -169,7 +172,26 @@ def new_project( mlrun.mlconf.default_project = project.metadata.name pipeline_context.set(project) if save and mlrun.mlconf.dbpath: - project.save() + if overwrite: + logger.info(f"Deleting project {name} from MLRun DB due to overwrite") + _delete_project_from_db( + name, secrets, mlrun.api.schemas.DeletionStrategy.cascade + ) + + try: + project.save(store=False) + except mlrun.errors.MLRunConflictError as exc: + raise mlrun.errors.MLRunConflictError( + f"Project with name {name} already exists. " + "Use overwrite=True to overwrite the existing project." + ) from exc + logger.info( + f"Created and saved project {name}", + from_template=from_template, + overwrite=overwrite, + context=context, + save=save, + ) return project @@ -400,6 +422,11 @@ def _load_project_from_db(url, secrets, user_project=False): return db.get_project(project_name) +def _delete_project_from_db(project_name, secrets, deletion_strategy): + db = mlrun.db.get_run_db(secrets=secrets) + return db.delete_project(project_name, deletion_strategy=deletion_strategy) + + def _load_project_file(url, name="", secrets=None): try: obj = get_object(url, secrets) @@ -2031,14 +2058,25 @@ def clear_context(self): ): shutil.rmtree(self.spec.context) - def save(self, filepath=None): + def save(self, filepath=None, store=True): + """export project to yaml file and save project in database + + :store: if True, allow updating in case project already exists + """ self.export(filepath) - self.save_to_db() + self.save_to_db(store) return self - def save_to_db(self): + def save_to_db(self, store=True): + """save project to database + + :store: if True, allow updating in case project already exists + """ db = mlrun.db.get_run_db(secrets=self._secrets) - db.store_project(self.metadata.name, self.to_dict()) + if store: + return db.store_project(self.metadata.name, self.to_dict()) + + return db.create_project(self.to_dict()) def export(self, filepath=None, include_files: str = None): """save the project object into a yaml file or zip archive (default to project.yaml) diff --git a/tests/api/api/test_projects.py b/tests/api/api/test_projects.py index 748e6f4c50e..c23eaa79fd1 100644 --- a/tests/api/api/test_projects.py +++ b/tests/api/api/test_projects.py @@ -170,6 +170,7 @@ def test_delete_project_with_resources( # need to set this to False, otherwise impl will try to delete k8s resources, and will need many more # mocks to overcome this. k8s_secrets_mock.set_is_running_in_k8s_cluster(False) + mlrun.mlconf.namespace = "test-namespace" project_to_keep = "project-to-keep" project_to_remove = "project-to-remove" _create_resources_of_all_kinds(db, k8s_secrets_mock, project_to_keep) @@ -631,6 +632,7 @@ def test_delete_project_deletion_strategy_check_external_resource( project_member_mode: str, k8s_secrets_mock: tests.api.conftest.K8sSecretsMock, ) -> None: + mlrun.mlconf.namespace = "test-namespace" project = mlrun.api.schemas.Project( metadata=mlrun.api.schemas.ProjectMetadata(name="project-name"), spec=mlrun.api.schemas.ProjectSpec(), diff --git a/tests/integration/sdk_api/projects/test_project.py b/tests/integration/sdk_api/projects/test_project.py index 728f0d76e0d..b7550d2850f 100644 --- a/tests/integration/sdk_api/projects/test_project.py +++ b/tests/integration/sdk_api/projects/test_project.py @@ -26,16 +26,154 @@ class TestProject(tests.integration.sdk_api.base.TestMLRunIntegration): def test_create_project(self): project_name = "some-project" - project = mlrun.new_project(project_name) - project.save_to_db() + mlrun.new_project(project_name) projects = mlrun.get_run_db().list_projects() assert len(projects) == 1 assert projects[0].metadata.name == project_name + def test_create_project_failure_already_exists(self): + project_name = "some-project" + mlrun.new_project(project_name) + + with pytest.raises(mlrun.errors.MLRunConflictError) as exc: + mlrun.new_project(project_name) + assert ( + f"Project with name {project_name} already exists. Use overwrite=True to overwrite the existing project." + in str(exc.value) + ) + + def test_overwrite_project(self): + project_name = "some-project" + + # verify overwrite does not fail if project does not exist + project = mlrun.new_project(project_name, overwrite=True) + db = mlrun.get_run_db() + + # create several functions with several tags + labels = { + "name": "value", + "name2": "value2", + } + function = { + "test": "function", + "metadata": {"labels": labels}, + "spec": {"asd": "asdasd"}, + "status": {"bla": "blabla"}, + } + function_names = ["function_name_1", "function_name_2", "function_name_3"] + function_tags = ["some_tag", "some_tag2", "some_tag3"] + for function_name in function_names: + for function_tag in function_tags: + db.store_function( + function, + function_name, + project.metadata.name, + tag=function_tag, + versioned=True, + ) + + # create several artifacts + artifact = { + "test": "artifact", + "labels": labels, + "status": {"bla": "blabla"}, + } + artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"] + for artifact_key in artifact_keys: + db.store_artifact( + artifact_key, + artifact, + "some_uid", + tag="some_tag", + project=project.metadata.name, + ) + + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + + # verify artifacts and functions were created + project_artifacts = project.list_artifacts() + loaded_project_artifacts = projects[0].list_artifacts() + assert len(project_artifacts) == len(artifact_keys) + assert len(project_artifacts) == len(loaded_project_artifacts) + assert project_artifacts == loaded_project_artifacts + + project_functions = project.list_functions() + loaded_project_functions = projects[0].list_functions() + assert len(project_functions) == len(function_names) * len(function_tags) + assert len(project_functions) == len(loaded_project_functions) + + loaded_project_functions_dicts = [ + func.to_dict() for func in loaded_project_functions + ] + assert all( + [ + func.to_dict() in loaded_project_functions_dicts + for func in project_functions + ] + ) + + old_creation_time = projects[0].metadata.created + + mlrun.new_project(project_name, overwrite=True) + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + + # ensure cascade deletion strategy + assert projects[0].list_artifacts() == [] + assert projects[0].list_functions() is None + assert projects[0].metadata.created > old_creation_time + + def test_overwrite_empty_project(self): + project_name = "some-project" + + mlrun.new_project(project_name) + db = mlrun.get_run_db() + + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + + assert projects[0].list_artifacts() == [] + assert projects[0].list_functions() is None + old_creation_time = projects[0].metadata.created + + # overwrite empty project + mlrun.new_project(project_name, overwrite=True) + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + + assert projects[0].list_artifacts() == [] + assert projects[0].list_functions() is None + assert projects[0].metadata.created > old_creation_time + + def test_overwrite_project_failure(self): + project_name = "some-project" + + mlrun.new_project(project_name) + db = mlrun.get_run_db() + + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + old_creation_time = projects[0].metadata.created + + # overwrite with invalid from_template value + with pytest.raises(ValueError): + mlrun.new_project(project_name, from_template="bla", overwrite=True) + + # ensure project was not deleted + projects = db.list_projects() + assert len(projects) == 1 + assert projects[0].metadata.name == project_name + assert projects[0].metadata.created == old_creation_time + def test_load_project_from_db(self): project_name = "some-project" - project = mlrun.new_project(project_name) - project.save_to_db() + mlrun.new_project(project_name) mlrun.load_project(".", f"db://{project_name}") def test_load_project_with_save(self): diff --git a/tests/system/projects/test_project.py b/tests/system/projects/test_project.py index 48c5d90e21c..b0cecb39acf 100644 --- a/tests/system/projects/test_project.py +++ b/tests/system/projects/test_project.py @@ -58,10 +58,15 @@ def pipe_test(): @pytest.mark.enterprise class TestProject(TestMLRunSystem): project_name = "project-system-test-project" + custom_project_names_to_delete = [] def custom_setup(self): pass + def custom_teardown(self): + for name in self.custom_project_names_to_delete: + self._delete_test_project(name) + @property def assets_path(self): return ( @@ -69,8 +74,10 @@ def assets_path(self): / "assets" ) - def _create_project(self, project_name, with_repo=False): - proj = mlrun.new_project(project_name, str(self.assets_path)) + def _create_project(self, project_name, with_repo=False, overwrite=False): + proj = mlrun.new_project( + project_name, str(self.assets_path), overwrite=overwrite + ) proj.set_function( "prep_data.py", "prep-data", @@ -98,6 +105,7 @@ def _create_project(self, project_name, with_repo=False): def test_run(self): name = "pipe1" + self.custom_project_names_to_delete.append(name) # create project in context self._create_project(name) @@ -124,10 +132,9 @@ def test_run(self): assert len(functions) == 3 # prep-data, train, test assert functions[0].metadata.project == name - self._delete_test_project(name) - def test_run_artifact_path(self): name = "pipe1" + self.custom_project_names_to_delete.append(name) # create project in context self._create_project(name) @@ -146,11 +153,11 @@ def test_run_artifact_path(self): run_object = db.read_run(uid=graph_step["run_uid"], project=name) output_path = run_object["spec"]["output_path"] assert output_path == f"v3io:///projects/{name}/{run_id}" - self._delete_test_project(name) def test_run_git_load(self): # load project from git name = "pipe2" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) @@ -164,10 +171,10 @@ def test_run_git_load(self): run = project2.run("main", artifact_path=f"v3io:///projects/{name}") run.wait_for_completion() assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" - self._delete_test_project(name) def test_run_git_build(self): name = "pipe3" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) @@ -185,7 +192,6 @@ def test_run_git_build(self): ) run.wait_for_completion() assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" - self._delete_test_project(name) @staticmethod def _assert_cli_output(output: str, project_name: str): @@ -201,6 +207,7 @@ def _assert_cli_output(output: str, project_name: str): def test_run_cli(self): # load project from git name = "pipe4" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) @@ -217,6 +224,7 @@ def test_run_cli(self): # load the project from local dir and change a workflow project2 = mlrun.load_project(project_dir) + self.custom_project_names_to_delete.append(project2.metadata.name) project2.spec.workflows = {} project2.set_workflow("kf", "./kflow.py") project2.save() @@ -236,12 +244,11 @@ def test_run_cli(self): ] out = exec_project(args) self._assert_cli_output(out, name) - self._delete_test_project(name) - self._delete_test_project(project2.metadata.name) def test_cli_with_remote(self): # load project from git name = "pipermtcli" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) @@ -272,10 +279,10 @@ def test_cli_with_remote(self): ] out = exec_project(args) self._assert_cli_output(out, name) - self._delete_test_project(name) def test_inline_pipeline(self): name = "pipe5" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) project = self._create_project(name, True) @@ -285,11 +292,11 @@ def test_inline_pipeline(self): ) run.wait_for_completion() assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" - self._delete_test_project(name) def test_get_or_create(self): # create project and save to DB name = "newproj73" + self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) project = mlrun.get_or_create_project(name, project_dir) @@ -306,10 +313,71 @@ def test_get_or_create(self): # get project should read from context (project.yaml) project = mlrun.get_or_create_project(name, project_dir) assert project.spec.description == "mytest", "failed to get project" - self._delete_test_project(name) + + def test_new_project_overwrite(self): + # create project and save to DB + project_dir = f"{projects_dir}/{self.project_name}" + shutil.rmtree(project_dir, ignore_errors=True) + project = self._create_project(self.project_name, overwrite=True) + + db = mlrun.get_run_db() + project.sync_functions(save=True) + project.register_artifacts() + + # get project from db for creation time + project = db.get_project(name=self.project_name) + + assert len(project.list_functions()) == 5, "functions count mismatch" + assert len(project.list_artifacts()) == 1, "artifacts count mismatch" + old_creation_time = project.metadata.created + + project = mlrun.new_project( + self.project_name, str(self.assets_path), overwrite=True + ) + project.sync_functions(save=True) + project.register_artifacts() + project = db.get_project(name=self.project_name) + + assert ( + project.metadata.created > old_creation_time + ), "creation time is not after overwritten project's creation time" + + # ensure cascading delete + assert project.list_functions() is None, "project should not have functions" + assert len(project.list_artifacts()) == 0, "artifacts count mismatch" + + def test_overwrite_project_failure(self): + # create project and save to DB + project_dir = f"{projects_dir}/{self.project_name}" + shutil.rmtree(project_dir, ignore_errors=True) + project = self._create_project(self.project_name, overwrite=True) + + db = mlrun.get_run_db() + project.sync_functions(save=True) + project.register_artifacts() + + # get project from db for creation time + project = db.get_project(name=self.project_name) + + assert len(project.list_functions()) == 5, "functions count mismatch" + assert len(project.list_artifacts()) == 1, "artifacts count mismatch" + old_creation_time = project.metadata.created + + # overwrite with invalid from_template value + with pytest.raises(ValueError): + mlrun.new_project(self.project_name, from_template="bla", overwrite=True) + + # ensure project was not deleted + project = db.get_project(name=self.project_name) + assert len(project.list_functions()) == 5, "functions count mismatch" + assert len(project.list_artifacts()) == 1, "artifacts count mismatch" + assert ( + project.metadata.created == old_creation_time + ), "creation time was changed" def _test_new_pipeline(self, name, engine): project = self._create_project(name) + self.custom_project_names_to_delete.append(name) project.set_function( "gen_iris.py", "gen-iris", @@ -328,7 +396,6 @@ def _test_new_pipeline(self, name, engine): fn = project.get_function("gen-iris", ignore_cache=True) assert fn.status.state == "ready" assert fn.spec.image, "image path got cleared" - self._delete_test_project(name) def test_local_pipeline(self): self._test_new_pipeline("lclpipe", engine="local") @@ -388,6 +455,7 @@ def test_remote_pipeline_with_local_engine_from_github(self): def test_remote_from_archive(self): name = "pipe6" + self.custom_project_names_to_delete.append(name) project = self._create_project(name) archive_path = f"v3io:///projects/{project.name}/archive1.zip" project.export(archive_path) @@ -401,11 +469,11 @@ def test_remote_from_archive(self): ) assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" assert run.run_id, "workflow's run id failed to fetch" - self._delete_test_project() def test_local_cli(self): # load project from git name = "lclclipipe" + self.custom_project_names_to_delete.append(name) project = self._create_project(name) project.set_function( "gen_iris.py", @@ -432,11 +500,11 @@ def test_local_cli(self): out = exec_project(args) print("OUT:\n", out) assert out.find("pipeline run finished, state=Succeeded"), "pipeline failed" - self._delete_test_project(name) def test_build_and_run(self): # test that build creates a proper image and run will use the updated function (with the built image) name = "buildandrun" + self.custom_project_names_to_delete.append(name) project = mlrun.new_project(name, context=str(self.assets_path)) # test with user provided function object @@ -478,10 +546,9 @@ def test_build_and_run(self): assert fn.spec.image, "image path got cleared" assert run_result.output("score") - self._delete_test_project(name) - def test_set_secrets(self): name = "set-secrets" + self.custom_project_names_to_delete.append(name) project = mlrun.new_project(name, context=str(self.assets_path)) project.save() env_file = str(self.assets_path / "envfile") @@ -490,7 +557,3 @@ def test_set_secrets(self): project.set_secrets(file_path=env_file) secrets = db.list_project_secret_keys(name, provider="kubernetes") assert secrets.secret_keys == ["ENV_ARG1", "ENV_ARG2"] - - # Cleanup - self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") - self._delete_test_project(name)