Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@
from dvc.stage.loader import SingleStageLoader, StageLoader
from dvc.utils import relpath
from dvc.utils.collections import apply_diff
from dvc.utils.stage import (
dump_stage_file,
parse_stage,
parse_stage_for_update,
)
from dvc.utils.yaml import dump_yaml, parse_yaml, parse_yaml_for_update

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,7 +97,7 @@ def _load(self):

with self.repo.tree.open(self.path) as fd:
stage_text = fd.read()
d = parse_stage(stage_text, self.path)
d = parse_yaml(stage_text, self.path)
self.validate(d, self.relpath)
return d, stage_text

Expand Down Expand Up @@ -149,7 +145,7 @@ def dump(self, stage, **kwargs):
logger.debug(
"Saving information to '{file}'.".format(file=relpath(self.path))
)
dump_stage_file(self.path, serialize.to_single_stage_file(stage))
dump_yaml(self.path, serialize.to_single_stage_file(stage))
self.repo.scm.track_file(self.relpath)

def remove_with_prompt(self, force=False):
Expand Down Expand Up @@ -198,7 +194,7 @@ def _dump_pipeline_file(self, stage):
data = {}
if self.exists():
with open(self.path) as fd:
data = parse_stage_for_update(fd.read(), self.path)
data = parse_yaml_for_update(fd.read(), self.path)
else:
logger.info("Creating '%s'", self.relpath)
open(self.path, "w+").close()
Expand All @@ -214,7 +210,7 @@ def _dump_pipeline_file(self, stage):
logger.info(
"Adding stage '%s' to '%s'", stage.name, self.relpath,
)
dump_stage_file(self.path, data)
dump_yaml(self.path, data)
self.repo.scm.track_file(self.relpath)

@property
Expand Down Expand Up @@ -243,15 +239,15 @@ def remove_stage(self, stage):
return

with open(self.path, "r") as f:
d = parse_stage_for_update(f.read(), self.path)
d = parse_yaml_for_update(f.read(), self.path)

self.validate(d, self.path)
if stage.name not in d.get("stages", {}):
return

logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d["stages"][stage.name]
dump_stage_file(self.path, d)
dump_yaml(self.path, d)


class Lockfile(FileMixin):
Expand All @@ -261,7 +257,7 @@ def load(self):
if not self.exists():
return {}
with self.repo.tree.open(self.path) as fd:
data = parse_stage(fd.read(), self.path)
data = parse_yaml(fd.read(), self.path)
try:
self.validate(data, fname=self.relpath)
except StageFileFormatError:
Expand All @@ -279,14 +275,14 @@ def dump(self, stage, **kwargs):
open(self.path, "w+").close()
else:
with self.repo.tree.open(self.path, "r") as fd:
data = parse_stage_for_update(fd.read(), self.path)
data = parse_yaml_for_update(fd.read(), self.path)
modified = data.get(stage.name, {}) != stage_data.get(
stage.name, {}
)
if modified:
logger.info("Updating lock file '%s'", self.relpath)
data.update(stage_data)
dump_stage_file(self.path, data)
dump_yaml(self.path, data)
if modified:
self.repo.scm.track_file(self.relpath)

Expand All @@ -295,7 +291,7 @@ def remove_stage(self, stage):
return

with open(self.path) as f:
d = parse_stage_for_update(f.read(), self.path)
d = parse_yaml_for_update(f.read(), self.path)
self.validate(d, self.path)

if stage.name not in d:
Expand All @@ -304,7 +300,7 @@ def remove_stage(self, stage):
logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d[stage.name]

dump_stage_file(self.path, d)
dump_yaml(self.path, d)


class Dvcfile:
Expand Down
5 changes: 2 additions & 3 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,11 @@ def __init__(self):
)


class StageFileCorruptedError(DvcException):
class YAMLFileCorruptedError(DvcException):
def __init__(self, path):
path = relpath(path)
super().__init__(
"unable to read DVC-file: {} "
"YAML file structure is corrupted".format(path)
f"unable to read: '{path}', YAML file structure is corrupted"
)


Expand Down
4 changes: 2 additions & 2 deletions dvc/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dvc.stage.params import StageParams
from dvc.stage.utils import resolve_wdir
from dvc.utils.collections import apply_diff
from dvc.utils.stage import parse_stage_for_update
from dvc.utils.yaml import parse_yaml_for_update

if TYPE_CHECKING:
from dvc.stage import PipelineStage, Stage
Expand Down Expand Up @@ -165,7 +165,7 @@ def to_single_stage_file(stage: "Stage"):
# - apply changes to a returned structure
# - serialize it
if stage._stage_text is not None:
saved_state = parse_stage_for_update(stage._stage_text, stage.path)
saved_state = parse_yaml_for_update(stage._stage_text, stage.path)
# Stage doesn't work with meta in any way, so .dumpd() doesn't
# have it. We simply copy it over.
if "meta" in saved_state:
Expand Down
4 changes: 2 additions & 2 deletions dvc/stage/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dvc.stage.loader import StageLoader
from dvc.utils import dict_sha256, relpath
from dvc.utils.fs import makedirs
from dvc.utils.stage import dump_stage_file
from dvc.utils.yaml import dump_yaml

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -154,7 +154,7 @@ def save(self, stage):
path = self._get_cache_path(cache_key, cache_value)
dpath = os.path.dirname(path)
makedirs(dpath, exist_ok=True)
dump_stage_file(path, cache)
dump_yaml(path, cache)

def is_cached(self, stage):
return bool(self._load(stage))
Expand Down
20 changes: 10 additions & 10 deletions dvc/utils/stage.py β†’ dvc/utils/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,43 @@
from ruamel.yaml import YAML
from ruamel.yaml.error import YAMLError

from dvc.exceptions import StageFileCorruptedError
from dvc.exceptions import YAMLFileCorruptedError

try:
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader


def load_stage_file(path):
def load_yaml(path):
with open(path, encoding="utf-8") as fd:
return parse_stage(fd.read(), path)
return parse_yaml(fd.read(), path)


def parse_stage(text, path):
def parse_yaml(text, path):
try:
return yaml.load(text, Loader=SafeLoader) or {}
except yaml.error.YAMLError as exc:
raise StageFileCorruptedError(path) from exc
raise YAMLFileCorruptedError(path) from exc


def parse_stage_for_update(text, path):
def parse_yaml_for_update(text, path):
"""Parses text into Python structure.

Unlike `parse_stage()` this returns ordered dicts, values have special
Unlike `parse_yaml()` this returns ordered dicts, values have special
attributes to store comments and line breaks. This allows us to preserve
all of those upon dump.

This one is, however, several times slower than simple `parse_stage()`.
This one is, however, several times slower than simple `parse_yaml()`.
"""
try:
yaml = YAML()
return yaml.load(text) or {}
except YAMLError as exc:
raise StageFileCorruptedError(path) from exc
raise YAMLFileCorruptedError(path) from exc


def dump_stage_file(path, data):
def dump_yaml(path, data):
with open(path, "w", encoding="utf-8") as fd:
yaml = YAML()
yaml.default_flow_style = False
Expand Down
17 changes: 8 additions & 9 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
OutputDuplicationError,
OverlappingOutputPathsError,
RecursiveAddingWhileUsingFilename,
StageFileCorruptedError,
YAMLFileCorruptedError,
)
from dvc.main import main
from dvc.output.base import OutputAlreadyTrackedError, OutputIsStageFileError
Expand All @@ -27,7 +27,7 @@
from dvc.system import System
from dvc.utils import LARGE_DIR_SIZE, file_md5, relpath
from dvc.utils.fs import path_isin
from dvc.utils.stage import load_stage_file
from dvc.utils.yaml import load_yaml
from tests.basic_env import TestDvc
from tests.utils import get_gitignore_content

Expand All @@ -46,7 +46,7 @@ def test_add(tmp_dir, dvc):
assert stage.outs[0].info["md5"] == md5
assert stage.md5 is None

assert load_stage_file("foo.dvc") == {
assert load_yaml("foo.dvc") == {
"outs": [{"md5": "acbd18db4cc2f85cedef654fccc4a4d8", "path": "foo"}],
}

Expand Down Expand Up @@ -220,14 +220,14 @@ def test(self):
ret = main(["add", foo])
self.assertEqual(ret, 0)

d = load_stage_file("foo.dvc")
d = load_yaml("foo.dvc")
self.assertEqual(d["outs"][0]["path"], foo)

bar = os.path.join(cwd, self.BAR)
ret = main(["add", bar])
self.assertEqual(ret, 0)

d = load_stage_file("bar.dvc")
d = load_yaml("bar.dvc")
self.assertEqual(d["outs"][0]["path"], self.BAR)


Expand Down Expand Up @@ -371,7 +371,7 @@ def _test(self):
stage_file = self.data_file_name + DVC_FILE_SUFFIX
self.assertTrue(os.path.exists(stage_file))

d = load_stage_file(stage_file)
d = load_yaml(stage_file)
relative_data_path = posixpath.join(
self.link_name, self.data_file_name
)
Expand Down Expand Up @@ -436,8 +436,7 @@ def test(self):
assert 1 == ret

expected_error = (
"unable to read DVC-file: {} "
"YAML file structure is corrupted".format(foo_stage)
f"unable to read: '{foo_stage}', YAML file structure is corrupted"
)

assert expected_error in self._caplog.text
Expand Down Expand Up @@ -478,7 +477,7 @@ def test_failed_add_cleanup(tmp_dir, scm, dvc):
dvc.add("foo")
tmp_dir.gen("foo.dvc", "- broken\nyaml")

with pytest.raises(StageFileCorruptedError):
with pytest.raises(YAMLFileCorruptedError):
dvc.add("bar")

assert not os.path.exists("bar.dvc")
Expand Down
8 changes: 4 additions & 4 deletions tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from dvc.system import System
from dvc.utils import relpath
from dvc.utils.fs import walk_files
from dvc.utils.stage import dump_stage_file, load_stage_file
from dvc.utils.yaml import dump_yaml, load_yaml
from tests.basic_env import TestDvc, TestDvcGit
from tests.func.test_repro import TestRepro
from tests.remotes import S3
Expand Down Expand Up @@ -221,7 +221,7 @@ def test(self):
self.assertEqual(0, ret)

stage_path = self.DATA_DIR + DVC_FILE_SUFFIX
stage = load_stage_file(stage_path)
stage = load_yaml(stage_path)
staged_files = self.outs_info(stage)

# move instead of remove, to lock inode assigned to stage_files[0].path
Expand Down Expand Up @@ -304,10 +304,10 @@ def test(self):

class TestCheckoutMissingMd5InStageFile(TestRepro):
def test(self):
d = load_stage_file(self.file1_stage)
d = load_yaml(self.file1_stage)
del d[Stage.PARAM_OUTS][0][LocalRemote.PARAM_CHECKSUM]
del d[Stage.PARAM_DEPS][0][LocalRemote.PARAM_CHECKSUM]
dump_stage_file(self.file1_stage, d)
dump_yaml(self.file1_stage, d)

with pytest.raises(CheckoutError):
self.dvc.checkout(force=True)
Expand Down
8 changes: 4 additions & 4 deletions tests/func/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dvc.dvcfile import PIPELINE_FILE
from dvc.stage.exceptions import StageCommitError
from dvc.utils.stage import dump_stage_file, load_stage_file
from dvc.utils.yaml import dump_yaml, load_yaml


def test_commit_recursive(tmp_dir, dvc):
Expand Down Expand Up @@ -63,15 +63,15 @@ def test_commit_changed_md5(tmp_dir, dvc):
tmp_dir.gen({"file": "file content"})
(stage,) = dvc.add("file", no_commit=True)

stage_file_content = load_stage_file(stage.path)
stage_file_content = load_yaml(stage.path)
stage_file_content["md5"] = "1111111111"
dump_stage_file(stage.path, stage_file_content)
dump_yaml(stage.path, stage_file_content)

with pytest.raises(StageCommitError):
dvc.commit(stage.path)

dvc.commit(stage.path, force=True)
assert "md5" not in load_stage_file(stage.path)
assert "md5" not in load_yaml(stage.path)


def test_commit_no_exec(tmp_dir, dvc):
Expand Down
6 changes: 3 additions & 3 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from dvc.stage.exceptions import StageNotFound
from dvc.utils import file_md5
from dvc.utils.fs import remove
from dvc.utils.stage import dump_stage_file, load_stage_file
from dvc.utils.yaml import dump_yaml, load_yaml
from tests.basic_env import TestDvc
from tests.remotes import (
GCP,
Expand Down Expand Up @@ -523,9 +523,9 @@ def _test(self):
self.main(["push"])

stage_file_path = stage.relpath
content = load_stage_file(stage_file_path)
content = load_yaml(stage_file_path)
del content["outs"][0]["md5"]
dump_stage_file(stage_file_path, content)
dump_yaml(stage_file_path, content)

with self._caplog.at_level(logging.WARNING, logger="dvc"):
self._caplog.clear()
Expand Down
6 changes: 3 additions & 3 deletions tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
StageFileFormatError,
)
from dvc.stage.loader import StageNotFound
from dvc.utils.stage import dump_stage_file
from dvc.utils.yaml import dump_yaml


def test_run_load_one_for_multistage(tmp_dir, dvc):
Expand Down Expand Up @@ -244,14 +244,14 @@ def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
lock_data = lock_file.load()
lock_data["gibberish"] = True
data["gibberish"] = True
dump_stage_file(lock_file.relpath, lock_data)
dump_yaml(lock_file.relpath, lock_data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)

lock_file.remove()
dvc_file.dump(stage)

dump_stage_file(dvc_file.relpath, data)
dump_yaml(dvc_file.relpath, data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)

Expand Down
Loading