-
Notifications
You must be signed in to change notification settings - Fork 1.3k
dvc: use protected mode by default #3472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,19 +83,24 @@ class RemoteBASE(object): | |
| DEFAULT_NO_TRAVERSE = True | ||
| DEFAULT_VERIFY = False | ||
|
|
||
| CACHE_MODE = None | ||
|
||
| SHARED_MODE_MAP = {None: (None, None), "group": (None, None)} | ||
|
|
||
| state = StateNoop() | ||
|
|
||
| def __init__(self, repo, config): | ||
| self.repo = repo | ||
|
|
||
| self._check_requires(config) | ||
|
|
||
| shared = config.get("shared") | ||
| self._file_mode, self._dir_mode = self.SHARED_MODE_MAP[shared] | ||
|
|
||
| self.checksum_jobs = ( | ||
| config.get("checksum_jobs") | ||
| or (self.repo and self.repo.config["core"].get("checksum_jobs")) | ||
| or self.CHECKSUM_JOBS | ||
| ) | ||
| self.protected = False | ||
| self.no_traverse = config.get("no_traverse", self.DEFAULT_NO_TRAVERSE) | ||
| self.verify = config.get("verify", self.DEFAULT_VERIFY) | ||
| self._dir_info = {} | ||
|
|
@@ -221,7 +226,7 @@ def get_dir_checksum(self, path_info): | |
| new_info = self.cache.checksum_to_path_info(checksum) | ||
| if self.cache.changed_cache_file(checksum): | ||
| self.cache.makedirs(new_info.parent) | ||
| self.cache.move(tmp_info, new_info) | ||
| self.cache.move(tmp_info, new_info, mode=self.CACHE_MODE) | ||
|
|
||
| self.state.save(path_info, checksum) | ||
| self.state.save(new_info, checksum) | ||
|
|
@@ -409,30 +414,20 @@ def _do_link(self, from_info, to_info, link_method): | |
|
|
||
| link_method(from_info, to_info) | ||
|
|
||
| if self.protected: | ||
| self.protect(to_info) | ||
|
|
||
| logger.debug( | ||
| "Created %s'%s': %s -> %s", | ||
| "protected " if self.protected else "", | ||
| self.cache_types[0], | ||
| from_info, | ||
| to_info, | ||
| "Created '%s': %s -> %s", self.cache_types[0], from_info, to_info, | ||
| ) | ||
|
|
||
| def _save_file(self, path_info, checksum, save_link=True): | ||
| assert checksum | ||
|
|
||
| cache_info = self.checksum_to_path_info(checksum) | ||
| if self.changed_cache(checksum): | ||
| self.move(path_info, cache_info) | ||
| self.move(path_info, cache_info, mode=self.CACHE_MODE) | ||
| self.link(cache_info, path_info) | ||
| elif self.iscopy(path_info) and self._cache_is_copy(path_info): | ||
| # Default relink procedure involves unneeded copy | ||
| if self.protected: | ||
| self.protect(path_info) | ||
| else: | ||
| self.unprotect(path_info) | ||
| self.unprotect(path_info) | ||
| else: | ||
| self.remove(path_info) | ||
| self.link(cache_info, path_info) | ||
|
|
@@ -656,7 +651,8 @@ def open(self, path_info, mode="r", encoding=None): | |
| def remove(self, path_info): | ||
| raise RemoteActionNotImplemented("remove", self.scheme) | ||
|
|
||
| def move(self, from_info, to_info): | ||
| def move(self, from_info, to_info, mode=None): | ||
| assert mode is None | ||
| self.copy(from_info, to_info) | ||
| self.remove(from_info) | ||
|
|
||
|
|
@@ -718,6 +714,9 @@ def gc(self, named_cache): | |
| removed = True | ||
| return removed | ||
|
|
||
| def is_protected(self, path_info): | ||
| return False | ||
|
|
||
| def changed_cache_file(self, checksum): | ||
| """Compare the given checksum with the (corresponding) actual one. | ||
|
|
||
|
|
@@ -730,7 +729,14 @@ def changed_cache_file(self, checksum): | |
|
|
||
| - Remove the file from cache if it doesn't match the actual checksum | ||
| """ | ||
|
|
||
| cache_info = self.checksum_to_path_info(checksum) | ||
| if self.is_protected(cache_info): | ||
| logger.debug( | ||
| "Assuming '%s' is unchanged since it is read-only", cache_info | ||
| ) | ||
| return False | ||
|
|
||
| actual = self.get_checksum(cache_info) | ||
|
|
||
| logger.debug( | ||
|
|
@@ -744,6 +750,9 @@ def changed_cache_file(self, checksum): | |
| return True | ||
|
|
||
| if actual.split(".")[0] == checksum.split(".")[0]: | ||
| # making cache file read-only so we don't need to check it | ||
| # next time | ||
| self.protect(cache_info) | ||
| return False | ||
|
|
||
| if self.exists(cache_info): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,19 +32,11 @@ class RemoteLOCAL(RemoteBASE): | |
|
|
||
| DEFAULT_CACHE_TYPES = ["reflink", "copy"] | ||
|
|
||
| CACHE_MODE = 0o444 | ||
efiop marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| SHARED_MODE_MAP = {None: (0o644, 0o755), "group": (0o664, 0o775)} | ||
|
|
||
| def __init__(self, repo, config): | ||
| super().__init__(repo, config) | ||
| self.protected = config.get("protected", False) | ||
|
|
||
| shared = config.get("shared") | ||
| self._file_mode, self._dir_mode = self.SHARED_MODE_MAP[shared] | ||
|
|
||
| if self.protected: | ||
|
||
| # cache files are set to be read-only for everyone | ||
| self._file_mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH | ||
|
|
||
| self.cache_dir = config.get("url") | ||
| self._dir_info = {} | ||
|
|
||
|
|
@@ -142,23 +134,25 @@ def remove(self, path_info): | |
| if self.exists(path_info): | ||
| remove(path_info.fspath) | ||
|
|
||
| def move(self, from_info, to_info): | ||
| def move(self, from_info, to_info, mode=None): | ||
| if from_info.scheme != "local" or to_info.scheme != "local": | ||
| raise NotImplementedError | ||
|
|
||
| self.makedirs(to_info.parent) | ||
|
|
||
| if self.isfile(from_info): | ||
| mode = self._file_mode | ||
| else: | ||
| mode = self._dir_mode | ||
| if mode is None: | ||
| if self.isfile(from_info): | ||
| mode = self._file_mode | ||
| else: | ||
| mode = self._dir_mode | ||
efiop marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| move(from_info, to_info, mode=mode) | ||
|
|
||
| def copy(self, from_info, to_info): | ||
| tmp_info = to_info.parent / tmp_fname(to_info.name) | ||
| try: | ||
| System.copy(from_info, tmp_info) | ||
| os.chmod(fspath_py35(tmp_info), self._file_mode) | ||
| os.rename(fspath_py35(tmp_info), fspath_py35(to_info)) | ||
| except Exception: | ||
| self.remove(tmp_info) | ||
|
|
@@ -202,9 +196,13 @@ def hardlink(self, from_info, to_info): | |
| def is_hardlink(path_info): | ||
| return System.is_hardlink(path_info) | ||
|
|
||
| @staticmethod | ||
| def reflink(from_info, to_info): | ||
| System.reflink(from_info, to_info) | ||
| def reflink(self, from_info, to_info): | ||
| tmp_info = to_info.parent / tmp_fname(to_info.name) | ||
| System.reflink(from_info, tmp_info) | ||
|
||
| # NOTE: reflink has its own separate inode, so you can set permissions | ||
| # that are different from the source. | ||
| os.chmod(fspath_py35(tmp_info), self._file_mode) | ||
| os.rename(fspath_py35(tmp_info), fspath_py35(to_info)) | ||
|
|
||
| def cache_exists(self, checksums, jobs=None, name=None): | ||
| return [ | ||
|
|
@@ -402,8 +400,7 @@ def _log_missing_caches(checksum_info_dict): | |
| ) | ||
| logger.warning(msg) | ||
|
|
||
| @staticmethod | ||
| def _unprotect_file(path): | ||
| def _unprotect_file(self, path): | ||
| if System.is_symlink(path) or System.is_hardlink(path): | ||
| logger.debug("Unprotecting '{}'".format(path)) | ||
| tmp = os.path.join(os.path.dirname(path), "." + uuid()) | ||
|
|
@@ -423,13 +420,13 @@ def _unprotect_file(path): | |
| "a symlink or a hardlink.".format(path) | ||
| ) | ||
|
|
||
| os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) | ||
| os.chmod(path, self._file_mode) | ||
|
|
||
| def _unprotect_dir(self, path): | ||
| assert is_working_tree(self.repo.tree) | ||
|
|
||
| for fname in self.repo.tree.walk_files(path): | ||
| RemoteLOCAL._unprotect_file(fname) | ||
| self._unprotect_file(fname) | ||
|
|
||
| def unprotect(self, path_info): | ||
| path = path_info.fspath | ||
|
|
@@ -441,12 +438,11 @@ def unprotect(self, path_info): | |
| if os.path.isdir(path): | ||
| self._unprotect_dir(path) | ||
| else: | ||
| RemoteLOCAL._unprotect_file(path) | ||
| self._unprotect_file(path) | ||
|
|
||
| @staticmethod | ||
| def protect(path_info): | ||
| def protect(self, path_info): | ||
| path = fspath_py35(path_info) | ||
| mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH | ||
| mode = self.CACHE_MODE | ||
|
|
||
| try: | ||
| os.chmod(path, mode) | ||
|
|
@@ -519,3 +515,11 @@ def _get_unpacked_dir_names(self, checksums): | |
| if self.is_dir_checksum(c): | ||
| unpacked.add(c + self.UNPACKED_DIR_SUFFIX) | ||
| return unpacked | ||
|
|
||
| def is_protected(self, path_info): | ||
| if not self.exists(path_info): | ||
| return False | ||
|
|
||
| mode = os.stat(fspath_py35(path_info)).st_mode | ||
|
|
||
| return stat.S_IMODE(mode) == self.CACHE_MODE | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this one is effectively defunct from now on. Just keeping it in the SCHEMA to not break on older configs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not respect explicit
protected = False, does it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@skshetry It doesn't, as there is no clear use case for it.