Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 25 additions & 45 deletions dvc/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,7 @@
"""Manages cache of a DVC repo."""
from collections import defaultdict

from funcy import cached_property


def _make_remote_property(name):
"""
The config file is stored in a way that allows you to have a
cache for each remote.

This is needed when specifying external outputs
(as they require you to have an external cache location).

Imagine a config file like the following:

['remote "dvc-storage"']
url = ssh://localhost/tmp
ask_password = true

[cache]
ssh = dvc-storage

This method creates a cached property, containing cache named `name`:

self.config == {'ssh': 'dvc-storage'}
self.ssh # a RemoteSSH instance
"""

def getter(self):
from ..tree import get_cloud_tree
from .base import CloudCache

remote = self.config.get(name)
if not remote:
return None

tree = get_cloud_tree(self.repo, name=remote)
return CloudCache(tree)

getter.__name__ = name
return cached_property(getter)
from ..scheme import Schemes


class Cache:
Expand All @@ -50,9 +12,11 @@ class Cache:
"""

CACHE_DIR = "cache"
CLOUD_SCHEMES = [Schemes.S3, Schemes.GS, Schemes.SSH, Schemes.HDFS]

def __init__(self, repo):
from ..tree import get_cloud_tree
from .base import CloudCache
from .local import LocalCache

self.repo = repo
Expand All @@ -74,13 +38,29 @@ def __init__(self, repo):
settings[str(opt)] = config.get(opt)

tree = get_cloud_tree(repo, **settings)
self.local = LocalCache(tree)

s3 = _make_remote_property("s3")
gs = _make_remote_property("gs")
ssh = _make_remote_property("ssh")
hdfs = _make_remote_property("hdfs")
azure = _make_remote_property("azure")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Azure was never there πŸ€¦β€β™‚οΈ

self._cache = {}
self._cache[Schemes.LOCAL] = LocalCache(tree)
Copy link
Contributor Author

@efiop efiop Nov 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, missed self.local = None above. Will fix.

EDIT: in #4867


for scheme in self.CLOUD_SCHEMES:

remote = self.config.get(scheme)
if remote:
tree = get_cloud_tree(self.repo, name=remote)
cache = CloudCache(tree)
else:
cache = None

self._cache[scheme] = cache

def __getattr__(self, name):
try:
return self._cache[name]
except KeyError as exc:
raise AttributeError from exc

def by_scheme(self):
yield from self._cache.items()


class NamedCacheItem:
Expand Down
19 changes: 3 additions & 16 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,9 @@ def gc(
)

# treat caches as remotes for garbage collection
_do_gc("local", self.cache.local, used, jobs)

if self.cache.s3:
_do_gc("s3", self.cache.s3, used, jobs)

if self.cache.gs:
_do_gc("gs", self.cache.gs, used, jobs)

if self.cache.ssh:
_do_gc("ssh", self.cache.ssh, used, jobs)

if self.cache.hdfs:
_do_gc("hdfs", self.cache.hdfs, used, jobs)

if self.cache.azure:
_do_gc("azure", self.cache.azure, used, jobs)
for scheme, cache in self.cache.by_scheme():
if cache:
_do_gc(scheme, cache, used, jobs)

if cloud:
_do_gc("remote", self.cloud.get_remote(remote, "gc -c"), used, jobs)