Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions dvc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,40 @@
from contextlib import _GeneratorContextManager as GCM
from contextlib import contextmanager

from dvc.exceptions import DvcException, NotDvcRepoError
from funcy import reraise

from dvc.exceptions import (
NotDvcRepoError,
OutputNotFoundError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.path_info import PathInfo
from dvc.repo import Repo


class UrlNotDvcRepoError(DvcException):
"""Thrown if the given URL is not a DVC repository."""

def __init__(self, url):
super().__init__(f"'{url}' is not a DVC repository.")


def get_url(path, repo=None, rev=None, remote=None):
"""
Returns the URL to the storage location of a data file or directory tracked
in a DVC repo. For Git repos, HEAD is used unless a rev argument is
supplied. The default remote is tried unless a remote argument is supplied.

Raises UrlNotDvcRepoError if repo is not a DVC project.
Raises OutputNotFoundError if the file is not a dvc-tracked file.

NOTE: This function does not check for the actual existence of the file or
directory in the remote storage.
"""
with _make_repo(repo, rev=rev) as _repo:
if not isinstance(_repo, Repo):
raise UrlNotDvcRepoError(_repo.url) # pylint: disable=no-member
out = _repo.find_out_by_relpath(path)
remote_obj = _repo.cloud.get_remote(remote)
return str(remote_obj.tree.hash_to_path_info(out.hash_info.value))
path_info = PathInfo(_repo.root_dir) / path
with reraise(FileNotFoundError, PathMissingError(path, repo)):
metadata = _repo.repo_tree.metadata(path_info)

if not metadata.is_dvc:
raise OutputNotFoundError(path, repo)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looked into this, we used to throw OutputNotFoundError before as well, so this does not change anything.

Regarding UrlNotDvcRepoError, it's not precise to throw now as a git-repo can have subrepo inside of it.


cloud = metadata.repo.cloud
hash_info = _repo.repo_tree.get_hash(path_info)
return cloud.get_url_for(remote, checksum=hash_info.value)


def open( # noqa, pylint: disable=redefined-builtin
Expand Down Expand Up @@ -97,7 +102,7 @@ def _make_repo(repo_url=None, rev=None):
repo_url = repo_url or os.getcwd()
if rev is None and os.path.exists(repo_url):
try:
yield Repo(repo_url)
yield Repo(repo_url, subrepos=True)
return
except NotDvcRepoError:
pass # fallthrough to external_repo
Expand Down
4 changes: 4 additions & 0 deletions dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@ def status(
show_checksums=show_checksums,
log_missing=log_missing,
)

def get_url_for(self, remote, checksum):
remote = self.get_remote(remote)
return str(remote.tree.hash_to_path_info(checksum))
40 changes: 12 additions & 28 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import os

from voluptuous import Required

from dvc.exceptions import OutputNotFoundError
from dvc.path_info import PathInfo

from .local import LocalDependency


Expand Down Expand Up @@ -42,30 +37,18 @@ def repo_pair(self):
def __str__(self):
return "{} ({})".format(self.def_path, self.def_repo[self.PARAM_URL])

def _make_repo(self, *, locked=True):
def _make_repo(self, *, locked=True, **kwargs):
from dvc.external_repo import external_repo

d = self.def_repo
rev = (d.get("rev_lock") if locked else None) or d.get("rev")
return external_repo(d["url"], rev=rev)
return external_repo(d["url"], rev=rev, **kwargs)

def _get_hash(self, locked=True):
from dvc.tree.repo import RepoTree

with self._make_repo(locked=locked) as repo:
try:
return repo.find_out_by_relpath(self.def_path).hash_info
except OutputNotFoundError:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))

# we want stream but not fetch, so DVC out directories are
# walked, but dir contents is not fetched
tree = RepoTree(repo, stream=True)

# We are polluting our repo cache with some dir listing here
if tree.isdir(path):
return self.repo.cache.local.tree.get_hash(path, tree=tree)
return tree.get_file_hash(path)
# we want stream but not fetch, so DVC out directories are
# walked, but dir contents is not fetched
with self._make_repo(locked=locked, fetch=False, stream=True) as repo:
return repo.get_checksum(self.def_path)

def workspace_status(self):
current = self._get_hash(locked=True)
Expand All @@ -86,14 +69,15 @@ def dumpd(self):
return {self.PARAM_PATH: self.def_path, self.PARAM_REPO: self.def_repo}

def download(self, to):
with self._make_repo() as repo:
cache = self.repo.cache.local

with self._make_repo(cache_dir=cache.cache_dir) as repo:
if self.def_repo.get(self.PARAM_REV_LOCK) is None:
self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()

cache = self.repo.cache.local
with repo.use_cache(cache):
_, _, cache_infos = repo.fetch_external([self.def_path])
cache.checkout(to.path_info, cache_infos[0])
_, _, cache_infos = repo.fetch_external([self.def_path])

cache.checkout(to.path_info, cache_infos[0])

def update(self, rev=None):
if rev:
Expand Down
Loading