Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the binary index before attempting direct fetches #32137

Merged
merged 3 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions etc/spack/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ config:
# building and installing packages. This gives information about Spack's
# current progress as well as the current and total number of packages.
terminal_title: false

# Number of seconds a buildcache's index.json is cached locally before probing
# for updates, within a single Spack invocation. Defaults to 10 minutes.
binary_index_ttl: 600
scheibelp marked this conversation as resolved.
Show resolved Hide resolved
53 changes: 38 additions & 15 deletions lib/spack/spack/binary_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys
import tarfile
import tempfile
import time
import traceback
import warnings
from contextlib import closing
Expand Down Expand Up @@ -105,6 +106,9 @@ def __init__(self, cache_root):
# cache (_mirrors_for_spec)
self._specs_already_associated = set()

# mapping from mirror urls to the time.time() of the last index fetch.
self._last_fetch_times = {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And as far as I can see: this does not help across two spack install calls :(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was concern for sequences of operations like:

$ spack install ...
$ spack buildcache update-index ...
$ spack install ...

If the fetch timers spanned spack install commands, the latter spack install may (surprisingly) not see the effect of the spack buildcache update-index. Same logic as to why update() has a with_cooldown=False argument.

Note that index.json.hash gets fetched first before index.json (or at least, it's supposed to). So if the index.json didn't change between calls it won't be fetched a second time.

Copy link
Member

@haampie haampie Dec 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We otherwise invalidate cache by mtime, I was thinking it could make sense not to even make a request if (now - mtime) is small enough.


# _mirrors_for_spec is a dictionary mapping DAG hashes to lists of
# entries indicating mirrors where that concrete spec can be found.
# Each entry is a dictionary consisting of:
Expand Down Expand Up @@ -137,6 +141,7 @@ def clear(self):
self._index_file_cache = None
self._local_index_cache = None
self._specs_already_associated = set()
self._last_fetch_times = {}
self._mirrors_for_spec = {}

def _write_local_index_cache(self):
Expand Down Expand Up @@ -242,7 +247,6 @@ def find_built_spec(self, spec, mirrors_to_check=None):
}
]
"""
self.regenerate_spec_cache()
return self.find_by_hash(spec.dag_hash(), mirrors_to_check=mirrors_to_check)

def find_by_hash(self, find_hash, mirrors_to_check=None):
Expand All @@ -253,6 +257,9 @@ def find_by_hash(self, find_hash, mirrors_to_check=None):
mirrors_to_check: Optional mapping containing mirrors to check. If
None, just assumes all configured mirrors.
"""
if find_hash not in self._mirrors_for_spec:
# Not found in the cached index, pull the latest from the server.
self.update(with_cooldown=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bothers me too about this PR. Please do not make read operations do write operations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caches as a general rule do a write operation on cache miss. That's what's happening here. This should be normal and I was very surprised to find it not doing something like this before.

Copy link
Member

@haampie haampie Dec 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you wrote is more wrong than the previous implementation was wrong. The point is to do a local existence check to order the mirrors. That should never trigger a remote fetch on failure.

if find_hash not in self._mirrors_for_spec:
return None
results = self._mirrors_for_spec[find_hash]
Expand Down Expand Up @@ -283,7 +290,7 @@ def update_spec(self, spec, found_list):
"spec": new_entry["spec"],
}

def update(self):
def update(self, with_cooldown=False):
"""Make sure local cache of buildcache index files is up to date.
If the same mirrors are configured as the last time this was called
and none of the remote buildcache indices have changed, calling this
Expand Down Expand Up @@ -325,24 +332,37 @@ def update(self):

fetch_errors = []
all_methods_failed = True
ttl = spack.config.get("config:binary_index_ttl", 600)
now = time.time()

for cached_mirror_url in self._local_index_cache:
cache_entry = self._local_index_cache[cached_mirror_url]
cached_index_hash = cache_entry["index_hash"]
cached_index_path = cache_entry["index_path"]
if cached_mirror_url in configured_mirror_urls:
# May need to fetch the index and update the local caches
try:
needs_regen = self._fetch_and_cache_index(
cached_mirror_url, expect_hash=cached_index_hash
)
# Only do a fetch if the last fetch was longer than TTL ago
if (
with_cooldown
and ttl > 0
and cached_mirror_url in self._last_fetch_times
and now - self._last_fetch_times[cached_mirror_url] < ttl
blue42u marked this conversation as resolved.
Show resolved Hide resolved
):
# The fetch worked last time, so don't error
all_methods_failed = False
except FetchCacheError as fetch_error:
needs_regen = False
fetch_errors.extend(fetch_error.errors)
# The need to regenerate implies a need to clear as well.
spec_cache_clear_needed |= needs_regen
spec_cache_regenerate_needed |= needs_regen
else:
# May need to fetch the index and update the local caches
try:
needs_regen = self._fetch_and_cache_index(
cached_mirror_url, expect_hash=cached_index_hash
)
self._last_fetch_times[cached_mirror_url] = now
all_methods_failed = False
except FetchCacheError as fetch_error:
needs_regen = False
fetch_errors.extend(fetch_error.errors)
# The need to regenerate implies a need to clear as well.
spec_cache_clear_needed |= needs_regen
spec_cache_regenerate_needed |= needs_regen
else:
# No longer have this mirror, cached index should be removed
items_to_remove.append(
Expand All @@ -351,6 +371,8 @@ def update(self):
"cache_key": os.path.join(self._index_cache_root, cached_index_path),
}
)
if cached_mirror_url in self._last_fetch_times:
del self._last_fetch_times[cached_mirror_url]
spec_cache_clear_needed = True
spec_cache_regenerate_needed = True

Expand All @@ -369,6 +391,7 @@ def update(self):
# Need to fetch the index and update the local caches
try:
needs_regen = self._fetch_and_cache_index(mirror_url)
self._last_fetch_times[mirror_url] = now
all_methods_failed = False
except FetchCacheError as fetch_error:
fetch_errors.extend(fetch_error.errors)
Expand Down Expand Up @@ -1878,8 +1901,8 @@ def get_mirrors_for_spec(spec=None, mirrors_to_check=None, index_only=False):

results = binary_index.find_built_spec(spec, mirrors_to_check=mirrors_to_check)

# Maybe we just didn't have the latest information from the mirror, so
# try to fetch directly, unless we are only considering the indices.
# The index may be out-of-date. If we aren't only considering indices, try
# to fetch directly since we know where the file should be.
if not results and not index_only:
results = try_direct_fetch(spec, mirrors=mirrors_to_check)
# We found a spec by the direct fetch approach, we might as well
Expand Down
1 change: 1 addition & 0 deletions lib/spack/spack/schema/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"binary_index_root": {"type": "string"},
"url_fetch_method": {"type": "string", "enum": ["urllib", "curl"]},
"additional_external_search_paths": {"type": "array", "items": {"type": "string"}},
"binary_index_ttl": {"type": "integer", "minimum": 0},
},
"deprecatedProperties": {
"properties": ["module_roots"],
Expand Down
9 changes: 5 additions & 4 deletions lib/spack/spack/test/bindist.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,11 @@ def test_generate_index_missing(monkeypatch, tmpdir, mutable_config):
# Update index
buildcache_cmd("update-index", "-d", mirror_dir.strpath)

# Check dependency not in buildcache
cache_list = buildcache_cmd("list", "--allarch")
assert "libdwarf" in cache_list
assert "libelf" not in cache_list
with spack.config.override("config:binary_index_ttl", 0):
# Check dependency not in buildcache
cache_list = buildcache_cmd("list", "--allarch")
assert "libdwarf" in cache_list
assert "libelf" not in cache_list


def test_generate_indices_key_error(monkeypatch, capfd):
Expand Down