-
Notifications
You must be signed in to change notification settings - Fork 239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Project] Retain producers of exported artifacts #5283
Changes from 7 commits
4b07101
5c74efb
17e168e
ae975b5
f2e2b4d
66056ec
3d5b3ed
69b4e62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1375,14 +1375,7 @@ def register_artifacts(self): | |
artifact_path = mlrun.utils.helpers.template_artifact_path( | ||
self.spec.artifact_path or mlrun.mlconf.artifact_path, self.metadata.name | ||
) | ||
# TODO: To correctly maintain the list of artifacts from an exported project, | ||
# we need to maintain the different trees that generated them | ||
producer = ArtifactProducer( | ||
"project", | ||
self.metadata.name, | ||
self.metadata.name, | ||
tag=self._get_hexsha() or str(uuid.uuid4()), | ||
) | ||
project_tag = self._get_project_tag() | ||
for artifact_dict in self.spec.artifacts: | ||
if _is_imported_artifact(artifact_dict): | ||
import_from = artifact_dict["import_from"] | ||
|
@@ -1402,6 +1395,14 @@ def register_artifacts(self): | |
artifact.src_path = path.join( | ||
self.spec.get_code_path(), artifact.src_path | ||
) | ||
producer = self._resolve_artifact_producer(artifact, project_tag) | ||
if ( | ||
producer.name != self.metadata.name | ||
and self._resolve_existing_artifact( | ||
artifact, | ||
) | ||
): | ||
continue | ||
TomerShor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
artifact_manager.log_artifact( | ||
producer, artifact, artifact_path=artifact_path | ||
) | ||
|
@@ -1498,12 +1499,20 @@ def log_artifact( | |
artifact_path = mlrun.utils.helpers.template_artifact_path( | ||
artifact_path, self.metadata.name | ||
) | ||
producer = ArtifactProducer( | ||
"project", | ||
self.metadata.name, | ||
self.metadata.name, | ||
tag=self._get_hexsha() or str(uuid.uuid4()), | ||
) | ||
producer = self._resolve_artifact_producer(item) | ||
if producer.name != self.metadata.name: | ||
alonmr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# the artifact producer is retained, log it only if it doesn't already exist | ||
if existing_artifact := self._resolve_existing_artifact( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if the artifact changed and we want to update it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good question actually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question indeed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, that makes sense. I suggest adding a log that says the artifact is already registered so we're skipping it. |
||
item, | ||
tag, | ||
): | ||
artifact_key = item if isinstance(item, str) else item.key | ||
logger.info( | ||
"Artifact already exists, skipping logging", | ||
key=artifact_key, | ||
tag=tag, | ||
) | ||
return existing_artifact | ||
item = am.log_artifact( | ||
producer, | ||
item, | ||
|
@@ -3333,7 +3342,12 @@ def get_artifact(self, key, tag=None, iter=None, tree=None): | |
artifact = db.read_artifact( | ||
key, tag, iter=iter, project=self.metadata.name, tree=tree | ||
) | ||
return dict_to_artifact(artifact) | ||
|
||
# in tests, if an artifact is not found, the db returns None | ||
# in real usage, the db should raise an exception | ||
if artifact: | ||
return dict_to_artifact(artifact) | ||
return None | ||
|
||
def list_artifacts( | ||
self, | ||
|
@@ -3761,6 +3775,83 @@ def _validate_file_path(self, file_path: str, param_name: str): | |
f"<project.spec.get_code_path()>/<{param_name}>)." | ||
) | ||
|
||
def _resolve_artifact_producer( | ||
self, | ||
artifact: typing.Union[str, Artifact], | ||
project_producer_tag: str = None, | ||
) -> typing.Optional[ArtifactProducer]: | ||
""" | ||
Resolve the artifact producer of the given artifact. | ||
If the artifact's producer is a run, the artifact is registered with the original producer. | ||
Otherwise, the artifact is registered with the current project as the producer. | ||
|
||
:param artifact: The artifact to resolve its producer. | ||
:param project_producer_tag: The tag to use for the project as the producer. If not provided, a tag will be | ||
generated for the project. | ||
:return: A tuple of the resolved producer and the resolved artifact. | ||
""" | ||
|
||
if not isinstance(artifact, str) and artifact.producer: | ||
# if the artifact was imported from a yaml file, the producer can be a dict | ||
if isinstance(artifact.spec.producer, ArtifactProducer): | ||
producer_dict = artifact.spec.producer.get_meta() | ||
else: | ||
producer_dict = artifact.spec.producer | ||
|
||
if producer_dict.get("kind", "") == "run": | ||
return ArtifactProducer( | ||
name=producer_dict.get("name", ""), | ||
kind=producer_dict.get("kind", ""), | ||
project=producer_dict.get("project", ""), | ||
tag=producer_dict.get("tag", ""), | ||
) | ||
|
||
# do not retain the artifact's producer, replace it with the project as the producer | ||
project_producer_tag = project_producer_tag or self._get_project_tag() | ||
return ArtifactProducer( | ||
kind="project", | ||
name=self.metadata.name, | ||
project=self.metadata.name, | ||
tag=project_producer_tag, | ||
) | ||
|
||
def _resolve_existing_artifact( | ||
self, | ||
item: typing.Union[str, Artifact], | ||
tag: str = None, | ||
) -> typing.Optional[Artifact]: | ||
""" | ||
Check if there is and existing artifact with the given item and tag. | ||
If there is, return the existing artifact. Otherwise, return None. | ||
|
||
:param item: The item (or key) to check if there is an existing artifact for. | ||
:param tag: The tag to check if there is an existing artifact for. | ||
:return: The existing artifact if there is one, otherwise None. | ||
""" | ||
try: | ||
if isinstance(item, str): | ||
existing_artifact = self.get_artifact(key=item, tag=tag) | ||
else: | ||
existing_artifact = self.get_artifact( | ||
key=item.key, | ||
tag=item.tag, | ||
iter=item.iter, | ||
tree=item.tree, | ||
) | ||
if existing_artifact is not None: | ||
return existing_artifact.from_dict(existing_artifact) | ||
theSaarco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except mlrun.errors.MLRunNotFoundError: | ||
logger.debug( | ||
TomerShor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"No existing artifact was found", | ||
key=item if isinstance(item, str) else item.key, | ||
tag=tag if isinstance(item, str) else item.tag, | ||
tree=None if isinstance(item, str) else item.tree, | ||
) | ||
return None | ||
|
||
def _get_project_tag(self): | ||
return self._get_hexsha() or str(uuid.uuid4()) | ||
|
||
|
||
def _set_as_current_default_project(project: MlrunProject): | ||
mlrun.mlconf.default_project = project.metadata.name | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was this change needed? This means the
producer
will now be in the result ofbase_dict()
for artifacts - is this intended?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When setting an artifact on the project it is using the
base_dict()
, so theproducer
property was not exported because it was a part of the_extra_fields
.This is a crucial part to maintain the producers when loading a project - to actually have it on the exported spec.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this means that this fix will not work for any project exported prior to this PR, right?
Also, need to make sure there are no BC issues there (I don't think there are, but needs to be verified).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It obviously won't fix projects that were exported before this fix, so I'm not sure what BC issues we should check here..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not obvious that projects exported before the fix won't be fixed - it's the situation because the export doesn't contain the information needed for the fix.
As for BC, just to make sure that now that artifact export/import contains the producer field in the base export fields, that artifacts that were exported before won't fail importing due to it (again, I'm pretty sure they won't fail, just something to pay attention to).