Skip to content

Commit

Permalink
Normalize paths in packet metadata. (#46)
Browse files Browse the repository at this point in the history
The packet metadata keeps a record of what files are used and/or
produced by the report. When such files are directories, we want to
normalize the path separator. This will ensure that a packet produced on
Windows can be used on POSIX platforms and vice-versa.

Additionally, R (and therefore orderly2) uses forward slashes on all
platforms, including Windows. Using forward slashes in outpack-py means
packets are compatible and consistent across the two tools.

It's not obvious whether the functions that return copied paths to the
caller (eg. `orderly.shared_resource`) should return native paths or
POSIX paths. For now I've kept these as native paths.
  • Loading branch information
plietar committed May 3, 2024
1 parent c42fca9 commit b98c1f8
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 25 deletions.
13 changes: 9 additions & 4 deletions src/orderly/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,15 @@ def resource(files):
ctx = get_active_context()
src = ctx.path if ctx.is_active else None
files_expanded = util.expand_dirs(files, workdir=src)

if ctx.is_active:
files_expanded_posix = util.as_posix_path(files_expanded)

# TODO: If strict mode, copy expanded files into the working dir
for f in files_expanded:
for f in files_expanded_posix:
ctx.packet.mark_file_immutable(f)
ctx.orderly.resources += files_expanded
ctx.orderly.resources += files_expanded_posix

return files_expanded


Expand Down Expand Up @@ -107,9 +111,10 @@ def shared_resource(
result = _copy_shared_resources(ctx.root.path, ctx.path, files)

if ctx.is_active:
for f in result.keys():
result_posix = util.as_posix_path(result)
for f in result_posix.keys():
ctx.packet.mark_file_immutable(f)
ctx.orderly.shared_resources.update(result)
ctx.orderly.shared_resources.update(result_posix)

return result

Expand Down
4 changes: 2 additions & 2 deletions src/outpack/packet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from outpack.schema import outpack_schema_version, validate
from outpack.search import as_query, search_unique
from outpack.tools import git_info
from outpack.util import all_normal_files
from outpack.util import all_normal_files, as_posix_path


# TODO: most of these fields should be private.
Expand Down Expand Up @@ -91,7 +91,7 @@ def end(self, *, succesful=True):
self.time["end"] = time.time()
hash_algorithm = self.root.config.core.hash_algorithm
self.files = [
PacketFile.from_file(self.path, f, hash_algorithm)
PacketFile.from_file(self.path, as_posix_path(f), hash_algorithm)
for f in all_normal_files(self.path)
]
_check_immutable_files(self.files, self.immutable)
Expand Down
31 changes: 29 additions & 2 deletions src/outpack/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import time
from contextlib import contextmanager
from itertools import filterfalse, tee
from pathlib import Path
from typing import Dict, List, Optional, Union
from pathlib import Path, PurePath
from typing import Dict, List, Optional, Union, overload


def find_file_descend(filename, path):
Expand Down Expand Up @@ -190,3 +190,30 @@ def openable_temporary_file(*, mode: str = "w+b", dir: Optional[str] = None):
os.unlink(f.name)
except OSError:
pass


@overload
def as_posix_path(paths: str) -> str: ...


@overload
def as_posix_path(paths: List[str]) -> List[str]: ...


@overload
def as_posix_path(paths: Dict[str, str]) -> Dict[str, str]: ...


def as_posix_path(paths):
"""
Convert a native path into a posix path.
This is used when exporting paths into packet metadata, ensuring the
produced packets are portable across platforms.
"""
if isinstance(paths, dict):
return {as_posix_path(k): as_posix_path(v) for (k, v) in paths.items()}
elif isinstance(paths, list):
return [as_posix_path(v) for v in paths]
else:
return PurePath(paths).as_posix()
27 changes: 10 additions & 17 deletions tests/orderly/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_can_use_implicit_resource_directory(tmp_path):
meta = root.index.metadata(id)
assert {f.path for f in meta.files} == {
"report.py",
os.path.join("data", "numbers.txt"),
"data/numbers.txt",
}
assert meta.custom["orderly"]["role"] == [
{"path": "report.py", "role": "orderly"},
Expand All @@ -169,11 +169,11 @@ def test_can_use_explicit_resource_directory(tmp_path):
meta = root.index.metadata(id)
assert {f.path for f in meta.files} == {
"report.py",
os.path.join("data", "numbers.txt"),
"data/numbers.txt",
}
assert meta.custom["orderly"]["role"] == [
{"path": "report.py", "role": "orderly"},
{"path": os.path.join("data", "numbers.txt"), "role": "resource"},
{"path": "data/numbers.txt", "role": "resource"},
]


Expand Down Expand Up @@ -328,12 +328,6 @@ def test_can_use_shared_resources(tmp_path):


def test_can_use_shared_resources_directory(tmp_path):
# TODO(mrc-5241): This test uses os.path.join to form nested paths, which
# on windows will use a backslash as the path separator. This matches the
# implementation, but we probably want to revisit this at some point, and
# normalize all paths in the metadata to use posix-style forward slashes.
from os.path import join

root = helpers.create_temporary_root(tmp_path)
helpers.copy_examples("shared_dir", root)
helpers.copy_shared_resources("data", root)
Expand All @@ -344,29 +338,29 @@ def test_can_use_shared_resources_directory(tmp_path):
assert {el.path for el in meta.files} == {
"shared_dir.py",
"result.txt",
join("shared_data", "numbers.txt"),
join("shared_data", "weights.txt"),
"shared_data/numbers.txt",
"shared_data/weights.txt",
}
assert meta.custom == {
"orderly": {
"role": unordered(
[
{"path": "shared_dir.py", "role": "orderly"},
{
"path": join("shared_data", "weights.txt"),
"path": "shared_data/weights.txt",
"role": "shared",
},
{
"path": join("shared_data", "numbers.txt"),
"path": "shared_data/numbers.txt",
"role": "shared",
},
]
),
"artefacts": [],
"description": {"display": None, "long": None, "custom": None},
"shared": {
join("shared_data", "numbers.txt"): join("data", "numbers.txt"),
join("shared_data", "weights.txt"): join("data", "weights.txt"),
"shared_data/numbers.txt": "data/numbers.txt",
"shared_data/weights.txt": "data/weights.txt",
},
}
}
Expand Down Expand Up @@ -478,9 +472,8 @@ def test_packet_files_excludes_pycache(tmp_path):
assert {f.path for f in meta.files} == {
"report.py",
"foo.txt",
os.path.join("data", "bar.txt"),
"data/bar.txt",
}

packet = tmp_path / "archive" / "report" / id
assert (packet / "data").exists()
assert not (packet / "__pycache__").exists()
Expand Down
20 changes: 20 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from outpack.util import (
all_normal_files,
as_posix_path,
assert_file_exists,
assert_relative_path,
expand_dirs,
Expand Down Expand Up @@ -213,3 +214,22 @@ def test_openable_temporary_file():

assert f1.read() == "Hello"
assert not os.path.exists(f1.name)


def test_as_posix_path():
from os.path import join

input = join("foo", "bar", "baz")
assert as_posix_path(input) == "foo/bar/baz"

input = [join("hello", "world"), join("foo", "bar", "baz")]
assert as_posix_path(input) == ["hello/world", "foo/bar/baz"]

input = {
join("here", "aaa"): join("there", "bbb"),
join("foo", "bar"): join("baz", "qux"),
}
assert as_posix_path(input) == {
"here/aaa": "there/bbb",
"foo/bar": "baz/qux",
}

0 comments on commit b98c1f8

Please sign in to comment.