Skip to content

Commit

Permalink
locks: allow locks to work under high contention
Browse files Browse the repository at this point in the history
This is a bug found by Harshitha Menon.  This solution is a bit
different from what we discussed.  The `lock=None` line shouldn't be a
release, based on my testing, but should be `return (lock_type, None)`
to inform the caller it couldn't get the lock type requested without
disturbing the existing lock object in the database.  There were also a
couple of bugs due to taking write locks at the beginning without any
checking or release, and not releasing read locks before requeueing.
This version no longer gives me read upgrade to write errors, even
running 200 instances on one box.

* Change lock in check_deps_status to read, release if not installed,
  not sure why this was ever write, but read definitely is more
  appropriate here, and the read lock is only held out of the scope if
  the package is installed.
* Release read lock before requeueing to reduce chance of livelock, the
  timeout that caused the original issue now happens in roughly 3 of 200
  workers instead of 199 on average.
  • Loading branch information
trws committed Dec 7, 2021
1 parent 2386630 commit 52c8b97
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions lib/spack/spack/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ def _check_deps_status(self, request):
# Attempt to get a write lock to ensure another process does not
# uninstall the dependency while the requested spec is being
# installed
ltype, lock = self._ensure_locked('write', dep_pkg)
ltype, lock = self._ensure_locked('read', dep_pkg)
if lock is None:
msg = '{0} is write locked by another process'.format(dep_id)
raise InstallError(err.format(request.pkg_id, msg))
Expand All @@ -795,6 +795,8 @@ def _check_deps_status(self, request):
tty.debug('Flagging {0} as installed per the database'
.format(dep_id))
self._flag_installed(dep_pkg)
else:
lock.release_read()

def _prepare_for_install(self, task):
"""
Expand Down Expand Up @@ -1006,7 +1008,7 @@ def _ensure_locked(self, lock_type, pkg):
except (lk.LockDowngradeError, lk.LockTimeoutError) as exc:
tty.debug(err.format(op, desc, pkg_id, exc.__class__.__name__,
str(exc)))
lock = None
return (lock_type, None)

except (Exception, KeyboardInterrupt, SystemExit) as exc:
tty.error(err.format(op, desc, pkg_id, exc.__class__.__name__,
Expand Down Expand Up @@ -1569,16 +1571,7 @@ def install(self):
# uninstalling the package until we're done installing its
# dependents.
ltype, lock = self._ensure_locked('read', pkg)
if lock is not None:
self._update_installed(task)
_print_installed_pkg(pkg.prefix)

# It's an already installed compiler, add it to the config
if task.compiler:
spack.compilers.add_compilers_to_config(
spack.compilers.find_compilers([pkg.spec.prefix]))

else:
if lock is None:
# At this point we've failed to get a write or a read
# lock, which means another process has taken a write
# lock between our releasing the write and acquiring the
Expand All @@ -1589,6 +1582,15 @@ def install(self):
# or uninstalled -- on the next pass.
self.installed.remove(pkg_id)
self._requeue_task(task)
self._update_installed(task)
_print_installed_pkg(pkg.prefix)
continue

# It's an already installed compiler, add it to the config
if task.compiler:
spack.compilers.add_compilers_to_config(
spack.compilers.find_compilers([pkg.spec.prefix]))

continue

# Having a read lock on an uninstalled pkg may mean another
Expand All @@ -1600,6 +1602,7 @@ def install(self):
# established by the other process -- failed, installed, or
# uninstalled -- on the next pass.
if ltype == 'read':
lock.release_read()
self._requeue_task(task)
continue

Expand Down

0 comments on commit 52c8b97

Please sign in to comment.