Skip to content

Commit

Permalink
Adding shelephant update --sync-search (#214)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Feb 29, 2024
1 parent a60c18f commit 890075c
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 2 deletions.
32 changes: 31 additions & 1 deletion shelephant/dataset.py
Expand Up @@ -7,6 +7,7 @@
import textwrap
from copy import deepcopy

import click
import numpy as np
import prettytable
import tqdm
Expand Down Expand Up @@ -1282,6 +1283,11 @@ class MyFmt(
parser = argparse.ArgumentParser(formatter_class=MyFmt, description=desc)

parser.add_argument("--version", action="version", version=version)
parser.add_argument(
"--sync-search",
action="store_true",
help="Set the same search settings for all locations (except 'here').",
)
parser.add_argument(
"--base-link",
action="store_true",
Expand Down Expand Up @@ -1320,7 +1326,31 @@ def update(args: list[str]):
paths = [os.path.relpath(path, base) for path in args.path]
paths = np.unique(paths) if len(paths) > 0 else None
lock = None if not (sdir / "lock.txt").exists() else (sdir / "lock.txt").read_text().strip()
if args.force:

if args.sync_search:
names = yaml.read(sdir / "storage.yaml")
if "here" in names:
names.remove("here")
search = []
for name in names:
data = yaml.read(sdir / "storage" / f"{name}.yaml")
search += data.get("search", [])
# todo: merge search settings
search = sorted(list({yaml.dumps(i) for i in search}))
search = [yaml.loads(i) for i in search]

if not args.force:
print("Common search settings:")
print(yaml.dumps(search))
if not click.confirm("Apply to all locations?"):
raise OSError("Cancelled")

for name in names:
data = yaml.read(sdir / "storage" / f"{name}.yaml")
data["search"] = search
yaml.overwrite(sdir / "storage" / f"{name}.yaml", data)

if args.force and not args.sync_search:
assert paths is not None, "--force can only be used with path(s)"

if args.name is None:
Expand Down
20 changes: 20 additions & 0 deletions shelephant/yaml.py
Expand Up @@ -54,6 +54,26 @@ def read_item(filename: str | pathlib.Path, key: str | list[str] = []) -> list |
raise OSError(f'"{"/".join(key)}" not in "{filename}"')


def dumps(data: list | dict) -> str:
"""
Return data formatted as YAML.
:param data: The data to dump.
:return: The data formatted as YAML.
"""
return yaml.dump(data)


def loads(data: str) -> list | dict:
"""
Read data from a string formatted as YAML.
:param data: The data to read.
:return: The content of the YAML file.
"""
return yaml.load(data, Loader=yaml.FullLoader)


def dump(
filename: str | pathlib.Path, data: list | dict, force: bool = False, width: int = float("inf")
):
Expand Down
58 changes: 57 additions & 1 deletion tests/test_dataset.py
Expand Up @@ -553,7 +553,63 @@ def test_basic_manual(self):
".shelephant/storage/source2.yaml",
{"root": "../../../source2", "search": [{"rglob": "*.txt"}]},
)
f = ".shelephant/storage.yaml"
shelephant.dataset.update(["--base-link", "source1", "-q"])
shelephant.dataset.update(["--base-link", "source2", "-q"])

with cwd(dataset), contextlib.redirect_stdout(io.StringIO()) as sio:
shelephant.dataset.status(["--table", "PLAIN_COLUMNS"])

expect = [
"a.txt source1 == ==",
"b.txt source1 == ==",
"c.txt source1 == x",
"d.txt source1 == x",
"e.txt source2 x ==",
"f.txt source2 x ==",
]
ret = _plain(sio.getvalue())[1:]
self.assertEqual(ret, expect)

with cwd(dataset):
for f in ["a.txt", "b.txt", "c.txt", "d.txt"]:
self.assertEqual(pathlib.Path(f).readlink().parent.name, "source1")
for f in ["e.txt", "f.txt"]:
self.assertEqual(pathlib.Path(f).readlink().parent.name, "source2")

def test_basic_manual_sync_search(self):
with tempdir():
dataset = pathlib.Path("dataset")
source1 = pathlib.Path("source1")
source2 = pathlib.Path("source2")

dataset.mkdir()
source1.mkdir()
source2.mkdir()

with cwd(source1):
files = ["a.txt", "b.txt", "c.txt", "d.txt"]
create_dummy_files(files)

with cwd(source2):
create_dummy_files(["a.txt", "b.txt"])
create_dummy_files(["e.txt", "f.txt"], slice(6, None, None))

with cwd(dataset):
search = [{"rglob": "*.txt"}]
shelephant.dataset.init([])
shelephant.yaml.dump(
".shelephant/storage/source1.yaml",
{"root": "../../../source1", "search": search},
)
shelephant.yaml.dump(
".shelephant/storage/source2.yaml",
{"root": "../../../source2"},
)
shelephant.yaml.overwrite(
".shelephant/storage.yaml", ["here", "source1", "source2"]
)
shelephant.dataset.update(["--sync-search", "--force"])
assert shelephant.yaml.read(".shelephant/storage/source2.yaml")["search"] == search
shelephant.dataset.update(["--base-link", "source1", "-q"])
shelephant.dataset.update(["--base-link", "source2", "-q"])

Expand Down

0 comments on commit 890075c

Please sign in to comment.