From 109dd483891b8470f0d87963211042e5ee3190ed Mon Sep 17 00:00:00 2001 From: jeanluc Date: Wed, 10 Apr 2024 17:56:24 +0200 Subject: [PATCH] Don't unnecessarily download remote sources to cache --- changelog/66342.fixed.md | 1 + salt/states/file.py | 61 ++++++++++++++++++- .../functional/states/file/test_managed.py | 2 +- 3 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 changelog/66342.fixed.md diff --git a/changelog/66342.fixed.md b/changelog/66342.fixed.md new file mode 100644 index 000000000000..da57b2926d05 --- /dev/null +++ b/changelog/66342.fixed.md @@ -0,0 +1 @@ +Made `file.managed` skip download of a remote source if the managed file already exists with the correct hash diff --git a/salt/states/file.py b/salt/states/file.py index eae3fbca7986..5b2b1531bca3 100644 --- a/salt/states/file.py +++ b/salt/states/file.py @@ -2501,8 +2501,12 @@ def managed( Set to ``False`` to discard the cached copy of the source file once the state completes. This can be useful for larger files to keep them from taking up space in minion cache. However, keep in mind that discarding - the source file will result in the state needing to re-download the - source file if the state is run again. + the source file might result in the state needing to re-download the + source file if the state is run again. If the source is not a local or + ``salt://`` one, the source hash is known, ``skip_verify`` is not true + and the managed file exists with the correct hash and is not templated, + this is not the case (i.e. remote downloads are avoided if the local hash + matches the expected one). .. versionadded:: 2017.7.3 @@ -3220,6 +3224,59 @@ def managed( if defaults and not isinstance(defaults, dict): return _error(ret, "Defaults must be formed as a dict") + # If we're pulling from a remote source untemplated and we have a source hash, + # check early if the local file exists with the correct hash and skip + # managing contents if so. This avoids a lot of overhead. + if ( + contents is None + and not template + and source + and not skip_verify + and os.path.exists(name) + and replace + ): + try: + # If the source is a list, find the first existing file. + # We're doing this after basic checks to not slow down + # runs where it does not matter. + source, source_hash = __salt__["file.source_list"]( + source, source_hash, __env__ + ) + source_sum = None + if ( + source + and source_hash + and urllib.parse.urlparse(source).scheme + not in ( + "salt", + "file", + ) + and not os.path.isabs(source) + ): + source_sum = __salt__["file.get_source_sum"]( + name, + source, + source_hash, + source_hash_name, + __env__, + verify_ssl=verify_ssl, + source_hash_sig=source_hash_sig, + signed_by_any=signed_by_any, + signed_by_all=signed_by_all, + keyring=keyring, + gnupghome=gnupghome, + ) + hsum = __salt__["file.get_hash"](name, source_sum["hash_type"]) + except (CommandExecutionError, OSError) as err: + log.error( + "Failed checking existing file's hash against specified source_hash: %s", + err, + exc_info_on_loglevel=logging.DEBUG, + ) + else: + if source_sum and source_sum["hsum"] == hsum: + replace = False + if not replace and os.path.exists(name): ret_perms = {} # Check and set the permissions if necessary diff --git a/tests/pytests/functional/states/file/test_managed.py b/tests/pytests/functional/states/file/test_managed.py index 4a12ef163a19..04c5b9d57b90 100644 --- a/tests/pytests/functional/states/file/test_managed.py +++ b/tests/pytests/functional/states/file/test_managed.py @@ -1057,7 +1057,7 @@ def test_file_managed_remote_source_does_not_refetch_existing_file_with_correct_ Issue #64373 """ name = tmp_path / "scene33" - name.write_text(grail_scene33_file.read_text()) + name.write_bytes(grail_scene33_file.read_bytes()) ret = file.managed( str(name), source="http://127.0.0.1:1337/does/not/exist",