Skip to content

Commit

Permalink
[Artifacts] Parse producer URI when retaining producer (#5717)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomerShor committed Jun 5, 2024
1 parent aa3d12d commit 8c0a932
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 10 deletions.
21 changes: 21 additions & 0 deletions mlrun/artifacts/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import pathlib
import re
import typing
from os.path import exists, isdir
from urllib.parse import urlparse

import mlrun.common.schemas.artifact
import mlrun.config
import mlrun.utils.regex
from mlrun.utils.helpers import (
get_local_file_schema,
template_artifact_path,
Expand Down Expand Up @@ -76,6 +78,25 @@ def get_meta(self) -> dict:
def uid(self):
return None

@staticmethod
def parse_uri(uri: str) -> tuple[str, str, str]:
"""Parse artifact producer's uri
:param uri: artifact producer's uri in the format <project>/<uid>[-<iteration>]
:returns: tuple of project, uid, iteration
"""
uri_pattern = mlrun.utils.regex.artifact_producer_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
return "", "", ""
group_dict = match.groupdict()

return (
group_dict["project"] or "",
group_dict["uid"] or "",
group_dict["iteration"] or "",
)


def dict_to_artifact(struct: dict) -> Artifact:
kind = struct.get("kind", "")
Expand Down
10 changes: 7 additions & 3 deletions mlrun/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import mlrun
import mlrun.common.constants as mlrun_constants
import mlrun.common.schemas.notification
import mlrun.utils.regex

from .utils import (
dict_to_json,
Expand Down Expand Up @@ -1649,9 +1650,12 @@ def create_uri(project: str, uid: str, iteration: Union[int, str], tag: str = ""

@staticmethod
def parse_uri(uri: str) -> tuple[str, str, str, str]:
uri_pattern = (
r"^(?P<project>.*)@(?P<uid>.*)\#(?P<iteration>.*?)(:(?P<tag>.*))?$"
)
"""Parse the run's uri
:param uri: run uri in the format of <project>@<uid>#<iteration>[:tag]
:return: project, uid, iteration, tag
"""
uri_pattern = mlrun.utils.regex.run_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
raise ValueError(
Expand Down
13 changes: 11 additions & 2 deletions mlrun/projects/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4209,12 +4209,21 @@ def _resolve_artifact_producer(
else:
producer_dict = artifact.spec.producer

producer_tag = producer_dict.get("tag", None)
producer_project = producer_dict.get("project", None)
if not producer_tag or not producer_project:
# try resolving the producer tag from the uri
producer_uri = artifact.spec.producer.get("uri", "")
producer_project, producer_tag, _ = ArtifactProducer.parse_uri(
producer_uri
)

if producer_dict.get("kind", "") == "run":
return ArtifactProducer(
name=producer_dict.get("name", ""),
kind=producer_dict.get("kind", ""),
project=producer_dict.get("project", ""),
tag=producer_dict.get("tag", ""),
project=producer_project,
tag=producer_tag,
), True

# do not retain the artifact's producer, replace it with the project as the producer
Expand Down
2 changes: 1 addition & 1 deletion mlrun/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def parse_artifact_uri(uri, default_project=""):
[3] = tag
[4] = tree
"""
uri_pattern = r"^((?P<project>.*)/)?(?P<key>.*?)(\#(?P<iteration>.*?))?(:(?P<tag>.*?))?(@(?P<tree>.*))?$"
uri_pattern = mlrun.utils.regex.artifact_uri_pattern
match = re.match(uri_pattern, uri)
if not match:
raise ValueError(
Expand Down
9 changes: 9 additions & 0 deletions mlrun/utils/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,12 @@
# must be alphanumeric or _
# max 256 length
v3io_stream_consumer_group = [r"^(?!_)[a-zA-Z0-9_]{1,256}$"]

# URI patterns
run_uri_pattern = r"^(?P<project>.*)@(?P<uid>.*)\#(?P<iteration>.*?)(:(?P<tag>.*))?$"

artifact_uri_pattern = r"^((?P<project>.*)/)?(?P<key>.*?)(\#(?P<iteration>.*?))?(:(?P<tag>.*?))?(@(?P<tree>.*))?$"

artifact_producer_uri_pattern = (
r"^((?P<project>.*)/)?(?P<uid>.*?)(\-(?P<iteration>.*?))?$"
)
44 changes: 44 additions & 0 deletions tests/artifacts/test_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import uuid
from contextlib import nullcontext as does_not_raise

import deepdiff
import pandas as pd
import pytest
import yaml
Expand Down Expand Up @@ -624,3 +625,46 @@ def test_producer_in_exported_artifact():
with open(artifact_path) as file:
exported_artifact = yaml.load(file, Loader=yaml.FullLoader)
assert "producer" not in exported_artifact["spec"]


@pytest.mark.parametrize(
"uri,expected_parsed_result",
[
# Full URI
(
"my-project/1234-1",
("my-project", "1234", "1"),
),
# No iteration
(
"my-project/1234",
("my-project", "1234", ""),
),
# No project
(
"1234-1",
("", "1234", "1"),
),
# No UID
(
"my-project/-1",
("my-project", "", "1"),
),
# just iteration
(
"-1",
("", "", "1"),
),
# Nothing
(
"",
("", "", ""),
),
],
)
def test_artifact_producer_parse_uri(uri, expected_parsed_result):
parsed_result = mlrun.artifacts.ArtifactProducer.parse_uri(uri)
assert (
deepdiff.DeepDiff(parsed_result, expected_parsed_result, ignore_order=True)
== {}
)
19 changes: 15 additions & 4 deletions tests/projects/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,24 +993,35 @@ def test_import_artifact_retain_producer(rundb_mock):
# create an artifact with a 'run' producer
artifact = mlrun.artifacts.Artifact(key="x", body="123", is_inline=True)
run_name = "my-run"
run_tag = "some-tag"
run_tag = "sometag123"

# we set the producer as dict so the export will work
artifact.producer = mlrun.artifacts.ArtifactProducer(
kind="run",
project=project_1.name,
name=run_name,
tag=run_tag,
).get_meta()

# imitate the artifact being produced by a run with uri and without a tag
artifact.producer["uri"] = f"{project_1.name}/{run_tag}"
artifact.producer["project"] = project_1.name

# the uri is parsed when importing the artifact, so we set the expected producer
# also, the project is removed from the producer when importing
expected_producer = {
"kind": "run",
"name": run_name,
"tag": run_tag,
}

# export the artifact
artifact_path = f"{base_path}/my-artifact.yaml"
artifact.export(artifact_path)

# import the artifact to another project
new_key = "y"
imported_artifact = project_2.import_artifact(artifact_path, new_key)
assert imported_artifact.producer == artifact.producer
assert imported_artifact.producer == expected_producer

# set the artifact on the first project
project_1.set_artifact(artifact.key, artifact)
Expand All @@ -1023,7 +1034,7 @@ def test_import_artifact_retain_producer(rundb_mock):

# make sure the artifact was registered with the new key
loaded_artifact = project_3.get_artifact(new_key)
assert loaded_artifact.producer == artifact.producer
assert loaded_artifact.producer == expected_producer


def test_replace_exported_artifact_producer(rundb_mock):
Expand Down

0 comments on commit 8c0a932

Please sign in to comment.