Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions dvc/command/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def run(self):


def add_parser(subparsers, parent_parser):
GET_HELP = "Download/copy files or directories from DVC repository."
GET_HELP = (
"Download a file or directory from any DVC project or Git repository."
)
get_parser = subparsers.add_parser(
"get",
parents=[parent_parser],
Expand All @@ -40,18 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawDescriptionHelpFormatter,
)
get_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
get_parser.add_argument(
"path", help="Path to a file or directory within a DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
get_parser.add_argument(
"-o",
"--out",
nargs="?",
help="Destination path to copy/download files to.",
"-o", "--out", nargs="?", help="Destination path to download files to"
)
get_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
get_parser.set_defaults(func=CmdGet)
13 changes: 8 additions & 5 deletions dvc/command/imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def run(self):

def add_parser(subparsers, parent_parser):
IMPORT_HELP = (
"Download data from DVC repository and take it under DVC control."
"Download a file or directory from any DVC project or Git repository"
"and take it under DVC control."
)

import_parser = subparsers.add_parser(
Expand All @@ -41,15 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawTextHelpFormatter,
)
import_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
import_parser.add_argument(
"path", help="Path to data within DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
import_parser.add_argument(
"-o", "--out", nargs="?", help="Destination path to put data to."
"-o", "--out", nargs="?", help="Destination path to download files to"
)
import_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
import_parser.set_defaults(func=CmdImport)
5 changes: 0 additions & 5 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
)


class UrlNotDvcRepoError(DvcException):
def __init__(self, url):
super().__init__("URL '{}' is not a dvc repository.".format(url))


class GitHookAlreadyExistsError(DvcException):
def __init__(self, hook_name):
super().__init__(
Expand Down
63 changes: 36 additions & 27 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
DvcException,
NotDvcRepoError,
OutputNotFoundError,
UrlNotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.external_repo import external_repo, cached_clone
from dvc.path_info import PathInfo
from dvc.stage import Stage
from dvc.utils import resolve_output
Expand Down Expand Up @@ -42,37 +41,47 @@ def get(url, path, out=None, rev=None):
# and won't work with reflink/hardlink.
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
repo_dir = None
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we are
# done, and to make that work we would have to copy data over
# anyway before removing the cache, so we might just copy it
# right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
else:
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we
# are done, and to make that work we would have to copy data
# over anyway before removing the cache, so we might just copy
# it right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
return

# Either an uncached out with absolute path or a user error
if os.path.isabs(path):
raise FileNotFoundError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: remove this extra line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

fs_copy(os.path.join(repo.root_dir, path), out)
repo_dir = repo.root_dir

except NotDvcRepoError:
# Not a DVC repository, continue below and copy from git
pass

if os.path.isabs(path):
raise FileNotFoundError

if not repo_dir:
repo_dir = cached_clone(url, rev=rev)

fs_copy(os.path.join(repo_dir, path), out)
except (OutputNotFoundError, FileNotFoundError):
raise PathMissingError(path, url)
except NotDvcRepoError:
raise UrlNotDvcRepoError(url)
finally:
remove(tmp_dir)

Expand Down
18 changes: 11 additions & 7 deletions tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from dvc.cache import Cache
from dvc.config import Config
from dvc.exceptions import UrlNotDvcRepoError
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
Expand Down Expand Up @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")

with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file.zip")
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
assert (tmp_dir / "file_imported").read_text() == "contents"


def test_get_a_dvc_file(tmp_dir, erepo_dir):
Expand Down Expand Up @@ -136,6 +136,14 @@ def test_absolute_file_outside_repo(tmp_dir, erepo_dir):
Repo.get(fspath(erepo_dir), "/root/")


def test_absolute_file_outside_git_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")

with pytest.raises(PathMissingError):
Repo.get(fspath(erepo_dir), "/root/")


def test_unknown_path(tmp_dir, erepo_dir):
with pytest.raises(PathMissingError):
Repo.get(fspath(erepo_dir), "a_non_existing_file")
Expand Down Expand Up @@ -164,10 +172,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
erepo_dir.dvc.scm.commit("remove .dvc")

# sanity check
with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file")

caplog.clear()
dst = "file_imported"
with caplog.at_level(logging.INFO, logger="dvc"):
Expand Down