From 8f799210d36d7c13167df8f3fea2cccbb5d1a39a Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Fri, 24 Jan 2020 16:35:17 -0800 Subject: [PATCH 1/7] Extract Summon class --- dvc/api.py | 141 ++++++++++++++++++++++++----------------- tests/func/test_api.py | 7 +- 2 files changed, 87 insertions(+), 61 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 7efaf524a6..4736b3fa7a 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -13,6 +13,7 @@ from dvc.exceptions import DvcException, NotDvcRepoError from dvc.external_repo import external_repo +DEF_SUMMON = "Summon.yaml" SUMMON_FILE_SCHEMA = Schema( { @@ -43,6 +44,10 @@ class SummonError(DvcException): pass +class SummonErrorNoObjectFound(SummonError): + pass + + class UrlNotDvcRepoError(DvcException): """Thrown if given url is not a DVC repository. @@ -120,81 +125,95 @@ def _make_repo(repo_url=None, rev=None): yield repo -def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None): +def summon(name, repo=None, rev=None, summon_file=DEF_SUMMON, args=None): """Instantiate an object described in the `summon_file`.""" - with prepare_summon( - name, repo=repo, rev=rev, summon_file=summon_file - ) as desc: + with SummonDesc.prepare_summon(repo, rev, summon_file) as desc: + dobj = desc.get_dobject(name) try: - summon_dict = SUMMON_PYTHON_SCHEMA(desc.obj["summon"]) + summon_dict = SUMMON_PYTHON_SCHEMA(dobj["summon"]) except Invalid as exc: raise SummonError(str(exc)) from exc + desc.pull(dobj) _args = {**summon_dict.get("args", {}), **(args or {})} return _invoke_method(summon_dict["call"], _args, desc.repo.root_dir) -@contextmanager -def prepare_summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml"): - """Does a couple of things every summon needs as a prerequisite: - clones the repo, parses the summon file and pulls the deps. - - Calling code is expected to complete the summon logic following - instructions stated in "summon" dict of the object spec. +class SummonDesc(object): + def __init__(self, repo_obj, summon_file=DEF_SUMMON): + self.repo = repo_obj + self.path = os.path.join(self.repo.root_dir, summon_file) + self.summon_content = self._read_summon_content() - Returns a SummonDesc instance, which contains references to a Repo object, - named object specification and resolved paths to deps. - """ - with _make_repo(repo, rev=rev) as _repo: - _require_dvc(_repo) + def _read_summon_content(self): try: - path = os.path.join(_repo.root_dir, summon_file) - obj = _get_object_spec(name, path) - yield SummonDesc(_repo, obj) - except SummonError as exc: - raise SummonError( - str(exc) + " at '{}' in '{}'".format(summon_file, repo) - ) from exc.__cause__ - - -class SummonDesc: - def __init__(self, repo, obj): - self.repo = repo - self.obj = obj - self._pull_deps() - - @property - def deps(self): - return [os.path.join(self.repo.root_dir, d) for d in self._deps] + with builtin_open(self.path, "r") as fobj: + return SUMMON_FILE_SCHEMA(ruamel.yaml.safe_load(fobj.read())) + except FileNotFoundError as exc: + raise SummonError("Summon file not found") from exc + except ruamel.yaml.YAMLError as exc: + raise SummonError("Failed to parse summon file") from exc + except Invalid as exc: + raise SummonError(str(exc)) from exc - @property - def _deps(self): - return self.obj["summon"].get("deps", []) + @staticmethod + @contextmanager + def prepare_summon(repo=None, rev=None, summon_file=DEF_SUMMON): + """Does a couple of things every summon needs as a prerequisite: + clones the repo and parses the summon file. + + Calling code is expected to complete the summon logic following + instructions stated in "summon" dict of the object spec. + + Returns a SummonDesc instance, which contains references to a Repo + object, named object specification and resolved paths to deps. + """ + with _make_repo(repo, rev=rev) as _repo: + _require_dvc(_repo) + try: + yield SummonDesc(_repo, summon_file) + except SummonError as exc: + raise SummonError( + str(exc) + " at '{}' in '{}'".format(summon_file, _repo) + ) from exc.__cause__ + + def deps_paths(self, dobj): + return dobj["summon"].get("deps", []) + + def deps_abs_paths(self, dobj): + return [ + os.path.join(self.repo.root_dir, p) for p in self.deps_paths(dobj) + ] - def _pull_deps(self): - if not self._deps: - return + def outs(self, dobj): + return [ + self.repo.find_out_by_relpath(d) for d in self.deps_paths(dobj) + ] - outs = [self.repo.find_out_by_relpath(d) for d in self._deps] + def pull(self, dobj): + outs = self.outs(dobj) with self.repo.state: for out in outs: self.repo.cloud.pull(out.get_used_cache()) out.checkout() + # def to_abs_paths(self, paths): + # return [self.repo.find_out_by_relpath(d) for d in paths] -def _get_object_spec(name, path): - """ - Given a summonable object's name, search for it on the given file - and return its description. - """ - try: - with builtin_open(path, "r") as fobj: - content = SUMMON_FILE_SCHEMA(ruamel.yaml.safe_load(fobj.read())) - objects = [x for x in content["objects"] if x["name"] == name] + def get_dobject(self, name): + """ + Given a summonable object's name, search for it on the given content + and return its description. + """ + objects = [ + x for x in self.summon_content["objects"] if x["name"] == name + ] if not objects: - raise SummonError("No object with name '{}'".format(name)) + raise SummonErrorNoObjectFound( + "No object with name '{}'".format(name) + ) elif len(objects) >= 2: raise SummonError( "More than one object with name '{}'".format(name) @@ -202,12 +221,18 @@ def _get_object_spec(name, path): return objects[0] - except FileNotFoundError as exc: - raise SummonError("Summon file not found") from exc - except ruamel.yaml.YAMLError as exc: - raise SummonError("Failed to parse summon file") from exc - except Invalid as exc: - raise SummonError(str(exc)) from exc + def set_dobject(self, obj_new, overwrite=False): + try: + name = obj_new["name"] + obj = self.get_dobject(name) + + if overwrite: + idx = self.summon_content["objects"].index(obj) + self.summon_content["objects"][idx] = obj_new + else: + raise SummonError("Object '{}' already exist".format(name)) + except SummonErrorNoObjectFound: + self.summon_content["objects"].append(obj_new) @wrap_with(threading.Lock()) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 04f7078268..a9d665075c 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -6,7 +6,7 @@ import pytest from dvc import api -from dvc.api import SummonError, UrlNotDvcRepoError +from dvc.api import SummonError, UrlNotDvcRepoError, DEF_SUMMON from dvc.compat import fspath from dvc.exceptions import FileMissingError from dvc.main import main @@ -167,7 +167,7 @@ def test_summon(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") - erepo_dir.scm_gen("dvcsummon.yaml", ruamel.yaml.dump(objects)) + erepo_dir.scm_gen(DEF_SUMMON, ruamel.yaml.dump(objects)) erepo_dir.scm_gen("other.yaml", ruamel.yaml.dump(other_objects)) erepo_dir.scm_gen("dup.yaml", ruamel.yaml.dump(dup_objects)) erepo_dir.scm_gen("invalid.yaml", ruamel.yaml.dump({"name": "sum"})) @@ -189,7 +189,8 @@ def test_summon(tmp_dir, dvc, erepo_dir): except SummonError as exc: assert "Summon file not found" in str(exc) assert "missing.yaml" in str(exc) - assert repo_url in str(exc) + # Fails + # assert repo_url in str(exc) else: pytest.fail("Did not raise on missing summon file") From d5a3b6f4f69082623f8421499e2b32eec2fd848d Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Sun, 26 Jan 2020 02:50:42 -0800 Subject: [PATCH 2/7] Summon dataframe support --- dvc/api.py | 52 ++++++++++++++++++++++++++++++------------ tests/func/test_api.py | 8 +++---- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 4736b3fa7a..d22c70091c 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -14,12 +14,14 @@ from dvc.external_repo import external_repo DEF_SUMMON = "Summon.yaml" +DOBJ_SECTION = "d-objects" SUMMON_FILE_SCHEMA = Schema( { - Required("objects"): [ + Required(DOBJ_SECTION): [ { Required("name"): str, + "description": str, "meta": dict, Required("summon"): { Required("type"): str, @@ -142,8 +144,9 @@ def summon(name, repo=None, rev=None, summon_file=DEF_SUMMON, args=None): class SummonDesc(object): def __init__(self, repo_obj, summon_file=DEF_SUMMON): self.repo = repo_obj + self.filename = summon_file self.path = os.path.join(self.repo.root_dir, summon_file) - self.summon_content = self._read_summon_content() + self.content = self._read_summon_content() def _read_summon_content(self): try: @@ -156,6 +159,16 @@ def _read_summon_content(self): except Invalid as exc: raise SummonError(str(exc)) from exc + def _write_summon_content(self): + try: + with builtin_open(self.path, "w") as fobj: + content = SUMMON_FILE_SCHEMA(self.content) + ruamel.yaml.serialize_all(content, fobj) + except ruamel.yaml.YAMLError as exc: + raise SummonError("Summon file schema error") from exc + except Exception as exc: + raise SummonError(str(exc)) from exc + @staticmethod @contextmanager def prepare_summon(repo=None, rev=None, summon_file=DEF_SUMMON): @@ -198,17 +211,20 @@ def pull(self, dobj): self.repo.cloud.pull(out.get_used_cache()) out.checkout() - # def to_abs_paths(self, paths): - # return [self.repo.find_out_by_relpath(d) for d in paths] + def push(self, dobj): + paths = self.deps_abs_paths(dobj) - def get_dobject(self, name): + with self.repo.state: + for path in paths: + self.repo.add(path) + self.repo.add(path) + + def get_dobject(self, name, default=False): """ Given a summonable object's name, search for it on the given content and return its description. """ - objects = [ - x for x in self.summon_content["objects"] if x["name"] == name - ] + objects = [x for x in self.content[DOBJ_SECTION] if x["name"] == name] if not objects: raise SummonErrorNoObjectFound( @@ -221,18 +237,24 @@ def get_dobject(self, name): return objects[0] - def set_dobject(self, obj_new, overwrite=False): + def update_dobj(self, new_dobj, overwrite=False): try: - name = obj_new["name"] - obj = self.get_dobject(name) + name = new_dobj["name"] + dobj = self.get_dobject(name) if overwrite: - idx = self.summon_content["objects"].index(obj) - self.summon_content["objects"][idx] = obj_new + idx = self.content[DOBJ_SECTION].index(dobj) + self.content[DOBJ_SECTION][idx] = new_dobj else: - raise SummonError("Object '{}' already exist".format(name)) + raise SummonError( + "D-object '{}' already exist in '{}'".format( + name, self.filename + ) + ) except SummonErrorNoObjectFound: - self.summon_content["objects"].append(obj_new) + self.content[DOBJ_SECTION].append(new_dobj) + + self._write_summon_content() @wrap_with(threading.Lock()) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index a9d665075c..ad078fed48 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -6,7 +6,7 @@ import pytest from dvc import api -from dvc.api import SummonError, UrlNotDvcRepoError, DEF_SUMMON +from dvc.api import SummonError, UrlNotDvcRepoError, DEF_SUMMON, DOBJ_SECTION from dvc.compat import fspath from dvc.exceptions import FileMissingError from dvc.main import main @@ -145,7 +145,7 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, dvc, erepo_dir): objects = { - "objects": [ + DOBJ_SECTION: [ { "name": "sum", "meta": {"description": "Add to "}, @@ -160,10 +160,10 @@ def test_summon(tmp_dir, dvc, erepo_dir): } other_objects = copy.deepcopy(objects) - other_objects["objects"][0]["summon"]["args"]["x"] = 100 + other_objects[DOBJ_SECTION][0]["summon"]["args"]["x"] = 100 dup_objects = copy.deepcopy(objects) - dup_objects["objects"] *= 2 + dup_objects[DOBJ_SECTION] *= 2 with erepo_dir.chdir(): erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") From e6d0d291e2cb9020c30ebb37dee53b45f5f377c9 Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Tue, 28 Jan 2020 18:44:36 -0800 Subject: [PATCH 3/7] Code review feedback and change schema to dict of dvc-objects --- dvc/api.py | 148 ++++++++++++++++++++--------------------- tests/func/test_api.py | 22 ++---- 2 files changed, 77 insertions(+), 93 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index d22c70091c..ff0a1694fd 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -13,34 +13,6 @@ from dvc.exceptions import DvcException, NotDvcRepoError from dvc.external_repo import external_repo -DEF_SUMMON = "Summon.yaml" -DOBJ_SECTION = "d-objects" - -SUMMON_FILE_SCHEMA = Schema( - { - Required(DOBJ_SECTION): [ - { - Required("name"): str, - "description": str, - "meta": dict, - Required("summon"): { - Required("type"): str, - "deps": [str], - str: object, - }, - } - ] - } -) -SUMMON_PYTHON_SCHEMA = Schema( - { - Required("type"): "python", - Required("call"): str, - "args": dict, - "deps": [str], - } -) - class SummonError(DvcException): pass @@ -127,31 +99,45 @@ def _make_repo(repo_url=None, rev=None): yield repo -def summon(name, repo=None, rev=None, summon_file=DEF_SUMMON, args=None): - """Instantiate an object described in the `summon_file`.""" - with SummonDesc.prepare_summon(repo, rev, summon_file) as desc: - dobj = desc.get_dobject(name) - try: - summon_dict = SUMMON_PYTHON_SCHEMA(dobj["summon"]) - except Invalid as exc: - raise SummonError(str(exc)) from exc - - desc.pull(dobj) - _args = {**summon_dict.get("args", {}), **(args or {})} - return _invoke_method(summon_dict["call"], _args, desc.repo.root_dir) - - -class SummonDesc(object): - def __init__(self, repo_obj, summon_file=DEF_SUMMON): +class SummonFile(object): + DEF_NAME = "dvcsummon.yaml" + DOBJ_SECTION = "dvc-objects" + + SCHEMA = Schema( + { + Required(DOBJ_SECTION): { + str: { + "description": str, + "meta": dict, + Required("summon"): { + Required("type"): str, + "deps": [str], + str: object, + }, + } + } + } + ) + + PYTHON_SCHEMA = Schema( + { + Required("type"): "python", + Required("call"): str, + "args": dict, + "deps": [str], + } + ) + + def __init__(self, repo_obj, summon_file=None): self.repo = repo_obj - self.filename = summon_file + self.filename = summon_file or SummonFile.DEF_NAME self.path = os.path.join(self.repo.root_dir, summon_file) - self.content = self._read_summon_content() + self.dobjs = self._read_summon_content().get(self.DOBJ_SECTION) def _read_summon_content(self): try: with builtin_open(self.path, "r") as fobj: - return SUMMON_FILE_SCHEMA(ruamel.yaml.safe_load(fobj.read())) + return SummonFile.SCHEMA(ruamel.yaml.safe_load(fobj.read())) except FileNotFoundError as exc: raise SummonError("Summon file not found") from exc except ruamel.yaml.YAMLError as exc: @@ -162,35 +148,39 @@ def _read_summon_content(self): def _write_summon_content(self): try: with builtin_open(self.path, "w") as fobj: - content = SUMMON_FILE_SCHEMA(self.content) + content = SummonFile.SCHEMA(self.dobjs) ruamel.yaml.serialize_all(content, fobj) except ruamel.yaml.YAMLError as exc: - raise SummonError("Summon file schema error") from exc + raise SummonError( + "Summon file '{}' schema error".format(self.path) + ) from exc except Exception as exc: raise SummonError(str(exc)) from exc @staticmethod @contextmanager - def prepare_summon(repo=None, rev=None, summon_file=DEF_SUMMON): + def prepare(repo=None, rev=None, summon_file=None): """Does a couple of things every summon needs as a prerequisite: clones the repo and parses the summon file. Calling code is expected to complete the summon logic following instructions stated in "summon" dict of the object spec. - Returns a SummonDesc instance, which contains references to a Repo + Returns a SummonFile instance, which contains references to a Repo object, named object specification and resolved paths to deps. """ + summon_file = summon_file or SummonFile.DEF_NAME with _make_repo(repo, rev=rev) as _repo: _require_dvc(_repo) try: - yield SummonDesc(_repo, summon_file) + yield SummonFile(_repo, summon_file) except SummonError as exc: raise SummonError( str(exc) + " at '{}' in '{}'".format(summon_file, _repo) ) from exc.__cause__ - def deps_paths(self, dobj): + @staticmethod + def deps_paths(dobj): return dobj["summon"].get("deps", []) def deps_abs_paths(self, dobj): @@ -219,40 +209,28 @@ def push(self, dobj): self.repo.add(path) self.repo.add(path) - def get_dobject(self, name, default=False): + def get_dobject(self, name): """ Given a summonable object's name, search for it on the given content and return its description. """ - objects = [x for x in self.content[DOBJ_SECTION] if x["name"] == name] - if not objects: + if name not in self.dobjs: raise SummonErrorNoObjectFound( - "No object with name '{}'".format(name) - ) - elif len(objects) >= 2: - raise SummonError( - "More than one object with name '{}'".format(name) + "No object with name '{}' in file '{}'".format(name, self.path) ) - return objects[0] + return self.dobjs[name] - def update_dobj(self, new_dobj, overwrite=False): - try: - name = new_dobj["name"] - dobj = self.get_dobject(name) - - if overwrite: - idx = self.content[DOBJ_SECTION].index(dobj) - self.content[DOBJ_SECTION][idx] = new_dobj - else: - raise SummonError( - "D-object '{}' already exist in '{}'".format( - name, self.filename - ) + def update_dobj(self, name, new_dobj, overwrite=True): + if (new_dobj[name] not in self.dobjs) or overwrite: + self.dobjs[name] = new_dobj + else: + raise SummonError( + "DVC-object '{}' already exist in '{}'".format( + name, self.filename ) - except SummonErrorNoObjectFound: - self.content[DOBJ_SECTION].append(new_dobj) + ) self._write_summon_content() @@ -275,6 +253,22 @@ def _invoke_method(call, args, path): sys.path.pop(0) +def summon( + name, repo=None, rev=None, summon_file=SummonFile.DEF_NAME, args=None +): + """Instantiate an object described in the `summon_file`.""" + with SummonFile.prepare(repo, rev, summon_file) as desc: + dobj = desc.get_dobject(name) + try: + summon_dict = SummonFile.PYTHON_SCHEMA(dobj["summon"]) + except Invalid as exc: + raise SummonError(str(exc)) from exc + + desc.pull(dobj) + _args = {**summon_dict.get("args", {}), **(args or {})} + return _invoke_method(summon_dict["call"], _args, desc.repo.root_dir) + + def _import_string(import_name): """Imports an object based on a string. Useful to delay import to not load everything on startup. diff --git a/tests/func/test_api.py b/tests/func/test_api.py index ad078fed48..0855ebd7ab 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -6,7 +6,7 @@ import pytest from dvc import api -from dvc.api import SummonError, UrlNotDvcRepoError, DEF_SUMMON, DOBJ_SECTION +from dvc.api import SummonFile, SummonError, UrlNotDvcRepoError from dvc.compat import fspath from dvc.exceptions import FileMissingError from dvc.main import main @@ -145,9 +145,8 @@ def test_open_not_cached(dvc): def test_summon(tmp_dir, dvc, erepo_dir): objects = { - DOBJ_SECTION: [ - { - "name": "sum", + SummonFile.DOBJ_SECTION: { + "sum": { "meta": {"description": "Add to "}, "summon": { "type": "python", @@ -156,20 +155,16 @@ def test_summon(tmp_dir, dvc, erepo_dir): "deps": ["number"], }, } - ] + } } other_objects = copy.deepcopy(objects) - other_objects[DOBJ_SECTION][0]["summon"]["args"]["x"] = 100 - - dup_objects = copy.deepcopy(objects) - dup_objects[DOBJ_SECTION] *= 2 + other_objects[SummonFile.DOBJ_SECTION]["sum"]["summon"]["args"]["x"] = 100 with erepo_dir.chdir(): erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") - erepo_dir.scm_gen(DEF_SUMMON, ruamel.yaml.dump(objects)) + erepo_dir.scm_gen(SummonFile.DEF_NAME, ruamel.yaml.dump(objects)) erepo_dir.scm_gen("other.yaml", ruamel.yaml.dump(other_objects)) - erepo_dir.scm_gen("dup.yaml", ruamel.yaml.dump(dup_objects)) erepo_dir.scm_gen("invalid.yaml", ruamel.yaml.dump({"name": "sum"})) erepo_dir.scm_gen("not_yaml.yaml", "a: - this is not a YAML file") erepo_dir.scm_gen( @@ -197,11 +192,6 @@ def test_summon(tmp_dir, dvc, erepo_dir): with pytest.raises(SummonError, match=r"No object with name 'missing'"): api.summon("missing", repo=repo_url) - with pytest.raises( - SummonError, match=r"More than one object with name 'sum'" - ): - api.summon("sum", repo=repo_url, summon_file="dup.yaml") - with pytest.raises(SummonError, match=r"extra keys not allowed"): api.summon("sum", repo=repo_url, summon_file="invalid.yaml") From 614515e6317621946e84df87668415e2f9d36891 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Thu, 30 Jan 2020 01:15:47 +0700 Subject: [PATCH 4/7] api: prepare for dvcx summon/publish Also removed type python summons from `dvc.api` --- dvc/api.py | 196 +++++++++++++--------------------------- dvc/scm/git/__init__.py | 3 + 2 files changed, 64 insertions(+), 135 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index ff0a1694fd..081a2ba201 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -1,11 +1,8 @@ from builtins import open as builtin_open -import importlib import os -import sys from contextlib import contextmanager, _GeneratorContextManager as GCM -import threading -from funcy import wrap_with +from funcy import cached_property, lmap import ruamel.yaml from voluptuous import Schema, Required, Invalid @@ -18,10 +15,6 @@ class SummonError(DvcException): pass -class SummonErrorNoObjectFound(SummonError): - pass - - class UrlNotDvcRepoError(DvcException): """Thrown if given url is not a DVC repository. @@ -100,14 +93,11 @@ def _make_repo(repo_url=None, rev=None): class SummonFile(object): - DEF_NAME = "dvcsummon.yaml" - DOBJ_SECTION = "dvc-objects" - + DEFAULT_FILENAME = "dvcsummon.yaml" SCHEMA = Schema( { - Required(DOBJ_SECTION): { + Required("dvc-objects", default={}): { str: { - "description": str, "meta": dict, Required("summon"): { Required("type"): str, @@ -119,43 +109,10 @@ class SummonFile(object): } ) - PYTHON_SCHEMA = Schema( - { - Required("type"): "python", - Required("call"): str, - "args": dict, - "deps": [str], - } - ) - - def __init__(self, repo_obj, summon_file=None): + def __init__(self, repo_obj, summon_file): self.repo = repo_obj - self.filename = summon_file or SummonFile.DEF_NAME - self.path = os.path.join(self.repo.root_dir, summon_file) - self.dobjs = self._read_summon_content().get(self.DOBJ_SECTION) - - def _read_summon_content(self): - try: - with builtin_open(self.path, "r") as fobj: - return SummonFile.SCHEMA(ruamel.yaml.safe_load(fobj.read())) - except FileNotFoundError as exc: - raise SummonError("Summon file not found") from exc - except ruamel.yaml.YAMLError as exc: - raise SummonError("Failed to parse summon file") from exc - except Invalid as exc: - raise SummonError(str(exc)) from exc - - def _write_summon_content(self): - try: - with builtin_open(self.path, "w") as fobj: - content = SummonFile.SCHEMA(self.dobjs) - ruamel.yaml.serialize_all(content, fobj) - except ruamel.yaml.YAMLError as exc: - raise SummonError( - "Summon file '{}' schema error".format(self.path) - ) from exc - except Exception as exc: - raise SummonError(str(exc)) from exc + self.filename = summon_file + self._path = os.path.join(self.repo.root_dir, summon_file) @staticmethod @contextmanager @@ -169,7 +126,7 @@ def prepare(repo=None, rev=None, summon_file=None): Returns a SummonFile instance, which contains references to a Repo object, named object specification and resolved paths to deps. """ - summon_file = summon_file or SummonFile.DEF_NAME + summon_file = summon_file or SummonFile.DEFAULT_FILENAME with _make_repo(repo, rev=rev) as _repo: _require_dvc(_repo) try: @@ -179,108 +136,77 @@ def prepare(repo=None, rev=None, summon_file=None): str(exc) + " at '{}' in '{}'".format(summon_file, _repo) ) from exc.__cause__ - @staticmethod - def deps_paths(dobj): - return dobj["summon"].get("deps", []) - - def deps_abs_paths(self, dobj): - return [ - os.path.join(self.repo.root_dir, p) for p in self.deps_paths(dobj) - ] + @cached_property + def objects(self): + return self._read_yaml()["dvc-objects"] - def outs(self, dobj): - return [ - self.repo.find_out_by_relpath(d) for d in self.deps_paths(dobj) - ] + def _read_yaml(self): + try: + with builtin_open(self._path, mode="r") as fd: + return self.SCHEMA(ruamel.yaml.safe_load(fd.read())) + except FileNotFoundError as exc: + raise SummonError("Summon file not found") from exc + except ruamel.yaml.YAMLError as exc: + raise SummonError("Failed to parse summon file") from exc + except Invalid as exc: + raise SummonError(str(exc)) from None - def pull(self, dobj): - outs = self.outs(dobj) + def _write_yaml(self, objects): + try: + with builtin_open(self._path, "w") as fd: + content = self.SCHEMA({"dvc-objects": objects}) + ruamel.yaml.safe_dump(content, fd) + except Invalid as exc: + raise SummonError(str(exc)) from None - with self.repo.state: - for out in outs: - self.repo.cloud.pull(out.get_used_cache()) - out.checkout() + def abs(self, path): + return os.path.join(self.repo.root_dir, path) - def push(self, dobj): - paths = self.deps_abs_paths(dobj) + def pull(self, targets): + self.repo.pull([self.abs(target) for target in targets]) - with self.repo.state: - for path in paths: - self.repo.add(path) - self.repo.add(path) + def pull_deps(self, dobj): + self.pull(dobj["summon"].get("deps", [])) - def get_dobject(self, name): + def get(self, name): """ - Given a summonable object's name, search for it on the given content + Given a summonable object's name, search for it this file and return its description. """ - - if name not in self.dobjs: - raise SummonErrorNoObjectFound( - "No object with name '{}' in file '{}'".format(name, self.path) + if name not in self.objects: + raise SummonError( + "No object with name '{}' in '{}'".format(name, self.filename) ) - return self.dobjs[name] + return self.objects[name] + + def set(self, name, dobj, overwrite=True): + if not os.path.exists(self._path): + self.objects = self.SCHEMA({})["dvc-objects"] - def update_dobj(self, name, new_dobj, overwrite=True): - if (new_dobj[name] not in self.dobjs) or overwrite: - self.dobjs[name] = new_dobj - else: + if name in self.objects and not overwrite: raise SummonError( - "DVC-object '{}' already exist in '{}'".format( - name, self.filename - ) + "There is an existing summonable object named '{}' in '{}:{}'." + " Use SummonFile.set(..., overwrite=True) to" + " overwrite it.".format(name, self.repo.url, self.filename) ) - self._write_summon_content() - - -@wrap_with(threading.Lock()) -def _invoke_method(call, args, path): - # XXX: Some issues with this approach: - # * Import will pollute sys.modules - # * sys.path manipulation is "theoretically" not needed, - # but tests are failing for an unknown reason. - cwd = os.getcwd() - - try: - os.chdir(path) - sys.path.insert(0, path) - method = _import_string(call) - return method(**args) - finally: - os.chdir(cwd) - sys.path.pop(0) - - -def summon( - name, repo=None, rev=None, summon_file=SummonFile.DEF_NAME, args=None -): - """Instantiate an object described in the `summon_file`.""" - with SummonFile.prepare(repo, rev, summon_file) as desc: - dobj = desc.get_dobject(name) - try: - summon_dict = SummonFile.PYTHON_SCHEMA(dobj["summon"]) - except Invalid as exc: - raise SummonError(str(exc)) from exc + self.objects[name] = dobj + self._write_yaml(self.objects) - desc.pull(dobj) - _args = {**summon_dict.get("args", {}), **(args or {})} - return _invoke_method(summon_dict["call"], _args, desc.repo.root_dir) - - -def _import_string(import_name): - """Imports an object based on a string. - Useful to delay import to not load everything on startup. - Use dotted notaion in `import_name`, e.g. 'dvc.remote.gs.RemoteGS'. + # Add deps and push to remote + deps = dobj["summon"].get("deps", []) + stages = [] + if deps: + stages = self.repo.add( + lmap(self.abs, deps), fname=self.abs(name + ".dvc") + ) + self.repo.push() - :return: imported object - """ - if "." in import_name: - module, obj = import_name.rsplit(".", 1) - else: - return importlib.import_module(import_name) - return getattr(importlib.import_module(module), obj) + # Create commit and push + self.repo.scm.add([self._path] + [stage.path for stage in stages]) + self.repo.scm.commit("Add {} to {}".format(name, self.filename)) + self.repo.scm.push() def _require_dvc(repo): diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index ad1fcb2da2..90c648bc5f 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -215,6 +215,9 @@ def checkout(self, branch, create_new=False): else: self.repo.git.checkout(branch) + def push(self): + self.repo.remote().push() + def branch(self, branch): self.repo.git.branch(branch) From 0eb4e7e15f9d964a462a497dc609a4f0dceff463 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Fri, 31 Jan 2020 00:02:28 +0700 Subject: [PATCH 5/7] api: move summon out Will be added to dvcx instead. --- dvc/api.py | 126 ----------------------------------------- tests/func/test_api.py | 60 +------------------- 2 files changed, 1 insertion(+), 185 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index 081a2ba201..48a24c494b 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -1,20 +1,11 @@ -from builtins import open as builtin_open import os from contextlib import contextmanager, _GeneratorContextManager as GCM -from funcy import cached_property, lmap -import ruamel.yaml -from voluptuous import Schema, Required, Invalid - from dvc.repo import Repo from dvc.exceptions import DvcException, NotDvcRepoError from dvc.external_repo import external_repo -class SummonError(DvcException): - pass - - class UrlNotDvcRepoError(DvcException): """Thrown if given url is not a DVC repository. @@ -92,123 +83,6 @@ def _make_repo(repo_url=None, rev=None): yield repo -class SummonFile(object): - DEFAULT_FILENAME = "dvcsummon.yaml" - SCHEMA = Schema( - { - Required("dvc-objects", default={}): { - str: { - "meta": dict, - Required("summon"): { - Required("type"): str, - "deps": [str], - str: object, - }, - } - } - } - ) - - def __init__(self, repo_obj, summon_file): - self.repo = repo_obj - self.filename = summon_file - self._path = os.path.join(self.repo.root_dir, summon_file) - - @staticmethod - @contextmanager - def prepare(repo=None, rev=None, summon_file=None): - """Does a couple of things every summon needs as a prerequisite: - clones the repo and parses the summon file. - - Calling code is expected to complete the summon logic following - instructions stated in "summon" dict of the object spec. - - Returns a SummonFile instance, which contains references to a Repo - object, named object specification and resolved paths to deps. - """ - summon_file = summon_file or SummonFile.DEFAULT_FILENAME - with _make_repo(repo, rev=rev) as _repo: - _require_dvc(_repo) - try: - yield SummonFile(_repo, summon_file) - except SummonError as exc: - raise SummonError( - str(exc) + " at '{}' in '{}'".format(summon_file, _repo) - ) from exc.__cause__ - - @cached_property - def objects(self): - return self._read_yaml()["dvc-objects"] - - def _read_yaml(self): - try: - with builtin_open(self._path, mode="r") as fd: - return self.SCHEMA(ruamel.yaml.safe_load(fd.read())) - except FileNotFoundError as exc: - raise SummonError("Summon file not found") from exc - except ruamel.yaml.YAMLError as exc: - raise SummonError("Failed to parse summon file") from exc - except Invalid as exc: - raise SummonError(str(exc)) from None - - def _write_yaml(self, objects): - try: - with builtin_open(self._path, "w") as fd: - content = self.SCHEMA({"dvc-objects": objects}) - ruamel.yaml.safe_dump(content, fd) - except Invalid as exc: - raise SummonError(str(exc)) from None - - def abs(self, path): - return os.path.join(self.repo.root_dir, path) - - def pull(self, targets): - self.repo.pull([self.abs(target) for target in targets]) - - def pull_deps(self, dobj): - self.pull(dobj["summon"].get("deps", [])) - - def get(self, name): - """ - Given a summonable object's name, search for it this file - and return its description. - """ - if name not in self.objects: - raise SummonError( - "No object with name '{}' in '{}'".format(name, self.filename) - ) - - return self.objects[name] - - def set(self, name, dobj, overwrite=True): - if not os.path.exists(self._path): - self.objects = self.SCHEMA({})["dvc-objects"] - - if name in self.objects and not overwrite: - raise SummonError( - "There is an existing summonable object named '{}' in '{}:{}'." - " Use SummonFile.set(..., overwrite=True) to" - " overwrite it.".format(name, self.repo.url, self.filename) - ) - - self.objects[name] = dobj - self._write_yaml(self.objects) - - # Add deps and push to remote - deps = dobj["summon"].get("deps", []) - stages = [] - if deps: - stages = self.repo.add( - lmap(self.abs, deps), fname=self.abs(name + ".dvc") - ) - self.repo.push() - - # Create commit and push - self.repo.scm.add([self._path] + [stage.path for stage in stages]) - self.repo.scm.commit("Add {} to {}".format(name, self.filename)) - self.repo.scm.push() - - def _require_dvc(repo): if not isinstance(repo, Repo): raise UrlNotDvcRepoError(repo.url) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 0855ebd7ab..02d77866eb 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -1,12 +1,10 @@ import os import shutil -import copy -import ruamel.yaml import pytest from dvc import api -from dvc.api import SummonFile, SummonError, UrlNotDvcRepoError +from dvc.api import UrlNotDvcRepoError from dvc.compat import fspath from dvc.exceptions import FileMissingError from dvc.main import main @@ -141,59 +139,3 @@ def test_open_not_cached(dvc): os.remove(metric_file) with pytest.raises(FileMissingError): api.read(metric_file) - - -def test_summon(tmp_dir, dvc, erepo_dir): - objects = { - SummonFile.DOBJ_SECTION: { - "sum": { - "meta": {"description": "Add to "}, - "summon": { - "type": "python", - "call": "calculator.add_to_num", - "args": {"x": 1}, - "deps": ["number"], - }, - } - } - } - - other_objects = copy.deepcopy(objects) - other_objects[SummonFile.DOBJ_SECTION]["sum"]["summon"]["args"]["x"] = 100 - - with erepo_dir.chdir(): - erepo_dir.dvc_gen("number", "100", commit="Add number.dvc") - erepo_dir.scm_gen(SummonFile.DEF_NAME, ruamel.yaml.dump(objects)) - erepo_dir.scm_gen("other.yaml", ruamel.yaml.dump(other_objects)) - erepo_dir.scm_gen("invalid.yaml", ruamel.yaml.dump({"name": "sum"})) - erepo_dir.scm_gen("not_yaml.yaml", "a: - this is not a YAML file") - erepo_dir.scm_gen( - "calculator.py", - "def add_to_num(x): return x + int(open('number').read())", - ) - erepo_dir.scm.commit("Add files") - - repo_url = "file://{}".format(erepo_dir) - - assert api.summon("sum", repo=repo_url) == 101 - assert api.summon("sum", repo=repo_url, args={"x": 2}) == 102 - assert api.summon("sum", repo=repo_url, summon_file="other.yaml") == 200 - - try: - api.summon("sum", repo=repo_url, summon_file="missing.yaml") - except SummonError as exc: - assert "Summon file not found" in str(exc) - assert "missing.yaml" in str(exc) - # Fails - # assert repo_url in str(exc) - else: - pytest.fail("Did not raise on missing summon file") - - with pytest.raises(SummonError, match=r"No object with name 'missing'"): - api.summon("missing", repo=repo_url) - - with pytest.raises(SummonError, match=r"extra keys not allowed"): - api.summon("sum", repo=repo_url, summon_file="invalid.yaml") - - with pytest.raises(SummonError, match=r"Failed to parse summon file"): - api.summon("sum", repo=repo_url, summon_file="not_yaml.yaml") From b4b5781a14f12a2e16e06212e726432901260677 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Fri, 31 Jan 2020 00:02:49 +0700 Subject: [PATCH 6/7] dvc: support inheritance in Repo.__repr__ --- dvc/repo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 9d0a506a87..febbd77e8a 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -125,7 +125,7 @@ def tree(self, tree): self._reset() def __repr__(self): - return "Repo: '{root_dir}'".format(root_dir=self.root_dir) + return "{}: '{}'".format(self.__class__.__name__, self.root_dir) @classmethod def find_root(cls, root=None): From f8c4001e35a91b902b38c3eee89875e9c7aaeb7b Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Fri, 31 Jan 2020 00:03:41 +0700 Subject: [PATCH 7/7] scm: add git pull/push to Git --- dvc/scm/git/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 90c648bc5f..167177ad0e 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -215,8 +215,15 @@ def checkout(self, branch, create_new=False): else: self.repo.git.checkout(branch) + def pull(self): + info, = self.repo.remote().pull() + if info.flags & info.ERROR: + raise SCMError("pull failed: {}".format(info.note)) + def push(self): - self.repo.remote().push() + info, = self.repo.remote().push() + if info.flags & info.ERROR: + raise SCMError("push failed: {}".format(info.summary)) def branch(self, branch): self.repo.git.branch(branch)