Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
afc8886
get: handle non-DVC repositories
fabiosantoscode Jan 9, 2020
d092854
style: improve code flow and move comments
fabiosantoscode Jan 10, 2020
09bdc73
doc: change get documentation so that it doesn't imply the target mus…
fabiosantoscode Jan 10, 2020
8e49228
get: recoup cache optimal location
fabiosantoscode Jan 10, 2020
3e386d9
Update dvc/repo/get.py
fabiosantoscode Jan 10, 2020
bfe0688
Revert "get: recoup cache optimal location"
fabiosantoscode Jan 10, 2020
d862115
remove unused exception class
fabiosantoscode Jan 10, 2020
0f7dad7
change command doc string
fabiosantoscode Jan 10, 2020
cabbb32
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
36bd848
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
c3314b5
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
724283f
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
f5cd424
import: update documentation
fabiosantoscode Jan 10, 2020
8db43b9
get: leverage external_repo() context manager for DVC repositories
fabiosantoscode Jan 10, 2020
9140e26
Update dvc/command/imp.py
fabiosantoscode Jan 10, 2020
e0c533f
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
97d0925
Update dvc/command/imp.py
fabiosantoscode Jan 10, 2020
5d0fc1a
Update dvc/command/imp.py
fabiosantoscode Jan 10, 2020
ff27132
Update dvc/command/imp.py
fabiosantoscode Jan 10, 2020
260ab15
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
f1e198b
Update dvc/command/imp.py
fabiosantoscode Jan 10, 2020
802d8ab
Update dvc/command/get.py
fabiosantoscode Jan 10, 2020
97b3c4a
Restyled by black
restyled-commits Jan 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions dvc/command/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def run(self):


def add_parser(subparsers, parent_parser):
GET_HELP = "Download/copy files or directories from DVC repository."
GET_HELP = (
"Download a file or directory from any DVC project or Git repository."
)
get_parser = subparsers.add_parser(
"get",
parents=[parent_parser],
Expand All @@ -40,18 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawDescriptionHelpFormatter,
)
get_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
get_parser.add_argument(
"path", help="Path to a file or directory within a DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
get_parser.add_argument(
"-o",
"--out",
nargs="?",
help="Destination path to copy/download files to.",
"-o", "--out", nargs="?", help="Destination path to download files to"
)
get_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
get_parser.set_defaults(func=CmdGet)
13 changes: 8 additions & 5 deletions dvc/command/imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def run(self):

def add_parser(subparsers, parent_parser):
IMPORT_HELP = (
"Download data from DVC repository and take it under DVC control."
"Download a file or directory from any DVC project or Git repository and take it under "
"DVC control."
)

import_parser = subparsers.add_parser(
Expand All @@ -41,15 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawTextHelpFormatter,
)
import_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
import_parser.add_argument(
"path", help="Path to data within DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
import_parser.add_argument(
"-o", "--out", nargs="?", help="Destination path to put data to."
"-o", "--out", nargs="?", help="Destination path to download files to"
)
import_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
import_parser.set_defaults(func=CmdImport)
5 changes: 0 additions & 5 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
)


class UrlNotDvcRepoError(DvcException):
def __init__(self, url):
super().__init__("URL '{}' is not a dvc repository.".format(url))


class GitHookAlreadyExistsError(DvcException):
def __init__(self, hook_name):
super().__init__(
Expand Down
58 changes: 34 additions & 24 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
DvcException,
NotDvcRepoError,
OutputNotFoundError,
UrlNotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.external_repo import external_repo, cached_clone
from dvc.path_info import PathInfo
from dvc.stage import Stage
from dvc.utils import resolve_output
Expand Down Expand Up @@ -42,39 +41,50 @@ def get(url, path, out=None, rev=None):
# and won't work with reflink/hardlink.
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
raw_git_dir = None
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we are
# done, and to make that work we would have to copy data over
# anyway before removing the cache, so we might just copy it
# right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
else:
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we
# are done, and to make that work we would have to copy data
# over anyway before removing the cache, so we might just copy
# it right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
return

# Either an uncached out with absolute path or a user error

if os.path.isabs(path):
raise FileNotFoundError

fs_copy(os.path.join(repo.root_dir, path), out)
return

except NotDvcRepoError:
# Not a DVC repository, continue below and copy from git
pass

raw_git_dir = cached_clone(url, rev=rev)
fs_copy(os.path.join(raw_git_dir, path), out)
except (OutputNotFoundError, FileNotFoundError):
raise PathMissingError(path, url)
except NotDvcRepoError:
raise UrlNotDvcRepoError(url)
finally:
remove(tmp_dir)
if raw_git_dir:
remove(raw_git_dir)


def _get_cached(repo, output, out):
Expand Down
10 changes: 3 additions & 7 deletions tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from dvc.cache import Cache
from dvc.config import Config
from dvc.exceptions import UrlNotDvcRepoError
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
Expand Down Expand Up @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")

with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file.zip")
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
assert (tmp_dir / "file_imported").read_text() == "contents"


def test_get_a_dvc_file(tmp_dir, erepo_dir):
Expand Down Expand Up @@ -164,10 +164,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
erepo_dir.dvc.scm.commit("remove .dvc")

# sanity check
with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file")

caplog.clear()
dst = "file_imported"
with caplog.at_level(logging.INFO, logger="dvc"):
Expand Down