Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Artifacts] Parse producer URI when retaining producer #5717

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions mlrun/artifacts/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import pathlib
import re
import typing
from os.path import exists, isdir
from urllib.parse import urlparse

import mlrun.common.schemas.artifact
import mlrun.config
import mlrun.utils.regex
from mlrun.utils.helpers import (
get_local_file_schema,
template_artifact_path,
Expand Down Expand Up @@ -76,6 +78,25 @@ def get_meta(self) -> dict:
def uid(self):
return None

@staticmethod
def parse_uri(uri: str) -> tuple[str, str, str]:
"""Parse artifact producer's uri

:param uri: artifact producer's uri in the format <project>/<uid>[-<iteration>]
:returns: tuple of project, uid, iteration
"""
uri_pattern = mlrun.utils.regex.artifact_producer_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
return "", "", ""
group_dict = match.groupdict()

return (
group_dict["project"] or "",
group_dict["uid"] or "",
group_dict["iteration"] or "",
)


def dict_to_artifact(struct: dict) -> Artifact:
kind = struct.get("kind", "")
Expand Down
10 changes: 7 additions & 3 deletions mlrun/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import mlrun
import mlrun.common.constants as mlrun_constants
import mlrun.common.schemas.notification
import mlrun.utils.regex

from .utils import (
dict_to_json,
Expand Down Expand Up @@ -1649,9 +1650,12 @@ def create_uri(project: str, uid: str, iteration: Union[int, str], tag: str = ""

@staticmethod
def parse_uri(uri: str) -> tuple[str, str, str, str]:
uri_pattern = (
r"^(?P<project>.*)@(?P<uid>.*)\#(?P<iteration>.*?)(:(?P<tag>.*))?$"
)
"""Parse the run's uri

:param uri: run uri in the format of <project>@<uid>#<iteration>[:tag]
:return: project, uid, iteration, tag
"""
uri_pattern = mlrun.utils.regex.run_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
raise ValueError(
Expand Down
13 changes: 11 additions & 2 deletions mlrun/projects/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4140,12 +4140,21 @@ def _resolve_artifact_producer(
else:
producer_dict = artifact.spec.producer

producer_tag = producer_dict.get("tag", None)
producer_project = producer_dict.get("project", None)
if not producer_tag or not producer_project:
# try resolving the producer tag from the uri
producer_uri = artifact.spec.producer.get("uri", "")
producer_project, producer_tag, _ = ArtifactProducer.parse_uri(
producer_uri
)

if producer_dict.get("kind", "") == "run":
return ArtifactProducer(
name=producer_dict.get("name", ""),
kind=producer_dict.get("kind", ""),
project=producer_dict.get("project", ""),
tag=producer_dict.get("tag", ""),
project=producer_project,
tag=producer_tag,
), True

# do not retain the artifact's producer, replace it with the project as the producer
Expand Down
2 changes: 1 addition & 1 deletion mlrun/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def parse_artifact_uri(uri, default_project=""):
[3] = tag
[4] = tree
"""
uri_pattern = r"^((?P<project>.*)/)?(?P<key>.*?)(\#(?P<iteration>.*?))?(:(?P<tag>.*?))?(@(?P<tree>.*))?$"
uri_pattern = mlrun.utils.regex.artifact_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
raise ValueError(
Expand Down
9 changes: 9 additions & 0 deletions mlrun/utils/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,12 @@
# must be alphanumeric or _
# max 256 length
v3io_stream_consumer_group = [r"^(?!_)[a-zA-Z0-9_]{1,256}$"]

# URI patterns
run_uri_pattern = r"^(?P<project>.*)@(?P<uid>.*)\#(?P<iteration>.*?)(:(?P<tag>.*))?$"

artifact_uri_pattern = r"^((?P<project>.*)/)?(?P<key>.*?)(\#(?P<iteration>.*?))?(:(?P<tag>.*?))?(@(?P<tree>.*))?$"

artifact_producer_uri_pattern = (
r"^((?P<project>.*)/)?(?P<uid>.*?)(\-(?P<iteration>.*?))?$"
)
44 changes: 44 additions & 0 deletions tests/artifacts/test_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import uuid
from contextlib import nullcontext as does_not_raise

import deepdiff
import pandas as pd
import pytest
import yaml
Expand Down Expand Up @@ -624,3 +625,46 @@ def test_producer_in_exported_artifact():
with open(artifact_path) as file:
exported_artifact = yaml.load(file, Loader=yaml.FullLoader)
assert "producer" not in exported_artifact["spec"]


@pytest.mark.parametrize(
"uri,expected_parsed_result",
[
# Full URI
(
"my-project/1234-1",
("my-project", "1234", "1"),
),
# No iteration
(
"my-project/1234",
("my-project", "1234", ""),
),
# No project
(
"1234-1",
("", "1234", "1"),
),
# No UID
(
"my-project/-1",
("my-project", "", "1"),
),
# just iteration
(
"-1",
("", "", "1"),
),
# Nothing
(
"",
("", "", ""),
),
],
)
def test_artifact_producer_parse_uri(uri, expected_parsed_result):
parsed_result = mlrun.artifacts.ArtifactProducer.parse_uri(uri)
assert (
deepdiff.DeepDiff(parsed_result, expected_parsed_result, ignore_order=True)
== {}
)
19 changes: 15 additions & 4 deletions tests/projects/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,24 +993,35 @@ def test_import_artifact_retain_producer(rundb_mock):
# create an artifact with a 'run' producer
artifact = mlrun.artifacts.Artifact(key="x", body="123", is_inline=True)
run_name = "my-run"
run_tag = "some-tag"
run_tag = "sometag123"

# we set the producer as dict so the export will work
artifact.producer = mlrun.artifacts.ArtifactProducer(
kind="run",
project=project_1.name,
name=run_name,
tag=run_tag,
).get_meta()

# imitate the artifact being produced by a run with uri and without a tag
artifact.producer["uri"] = f"{project_1.name}/{run_tag}"
artifact.producer["project"] = project_1.name

# the uri is parsed when importing the artifact, so we set the expected producer
# also, the project is removed from the producer when importing
expected_producer = {
"kind": "run",
"name": run_name,
"tag": run_tag,
}

# export the artifact
artifact_path = f"{base_path}/my-artifact.yaml"
artifact.export(artifact_path)

# import the artifact to another project
new_key = "y"
imported_artifact = project_2.import_artifact(artifact_path, new_key)
assert imported_artifact.producer == artifact.producer
assert imported_artifact.producer == expected_producer

# set the artifact on the first project
project_1.set_artifact(artifact.key, artifact)
Expand All @@ -1023,7 +1034,7 @@ def test_import_artifact_retain_producer(rundb_mock):

# make sure the artifact was registered with the new key
loaded_artifact = project_3.get_artifact(new_key)
assert loaded_artifact.producer == artifact.producer
assert loaded_artifact.producer == expected_producer


def test_replace_exported_artifact_producer(rundb_mock):
Expand Down
Loading