Skip to content

Commit

Permalink
overlord: address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mardy committed Dec 10, 2021
1 parent a1ffb7c commit 0c89837
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 13 deletions.
20 changes: 20 additions & 0 deletions overlord/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ func MockPruneInterval(prunei, prunew, abortw time.Duration) (restore func()) {
}
}

// MockStateLockTimeout sets the overlord state lock timeout for the tests.
func MockStateLockTimeout(timeout, retryInterval time.Duration) (restore func()) {
oldTimeout := stateLockTimeout
oldRetryInterval := stateLockRetryInterval
stateLockTimeout = timeout
stateLockRetryInterval = retryInterval
return func() {
stateLockTimeout = oldTimeout
stateLockRetryInterval = oldRetryInterval
}
}

func MockPruneTicker(f func(t *time.Ticker) <-chan time.Time) (restore func()) {
old := pruneTickerC
pruneTickerC = f
Expand Down Expand Up @@ -101,3 +113,11 @@ func MockPreseedExitWithError(f func(err error)) (restore func()) {
preseedExitWithError = old
}
}

func MockSystemdSdNotify(f func(notifyState string) error) (restore func()) {
old := systemdSdNotify
systemdSdNotify = f
return func() {
systemdSdNotify = old
}
}
34 changes: 22 additions & 12 deletions overlord/overlord.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,25 @@ import (
"github.com/snapcore/snapd/overlord/storecontext"
"github.com/snapcore/snapd/snapdenv"
"github.com/snapcore/snapd/store"
"github.com/snapcore/snapd/systemd"
"github.com/snapcore/snapd/timings"
)

const (
overlordStateLockTimeout = 20 * time.Second
)

var (
ensureInterval = 5 * time.Minute
pruneInterval = 10 * time.Minute
pruneWait = 24 * time.Hour * 1
abortWait = 24 * time.Hour * 7

stateLockTimeout = 1 * time.Minute
stateLockRetryInterval = 1 * time.Second

pruneMaxChanges = 500

defaultCachedDownloads = 5

configstateInit = configstate.Init
systemdSdNotify = systemd.SdNotify
)

var pruneTickerC = func(t *time.Ticker) <-chan time.Time {
Expand All @@ -81,7 +82,7 @@ var pruneTickerC = func(t *time.Ticker) <-chan time.Time {
// Overlord is the central manager of a snappy system, keeping
// track of all available state managers and related helpers.
type Overlord struct {
flock *osutil.FileLock
stateFLock *osutil.FileLock

stateEng *StateEngine
// ensure loop
Expand Down Expand Up @@ -226,17 +227,26 @@ func initStateFileLock() (*osutil.FileLock, error) {

func lockWithTimeout(l *osutil.FileLock, timeout time.Duration) error {
startTime := time.Now()
const retryInterval = time.Second
systemdWasNotified := false
for {
err := l.TryLock()
if err != osutil.ErrAlreadyLocked {
return err
}

// The state is locked. Let's notify systemd that our startup might be
// longer than usual, or we risk getting killed if we overstep the
// systemd timeout.
if !systemdWasNotified {
logger.Noticef("Adjusting startup timeout by %v", timeout)
systemdSdNotify(fmt.Sprintf("EXTEND_TIMEOUT_USEC=%d", timeout.Microseconds()))
systemdWasNotified = true
}

if time.Since(startTime) >= timeout {
return errors.New("timeout for state lock file expired")
}
time.Sleep(retryInterval)
time.Sleep(stateLockRetryInterval)
}
}

Expand All @@ -245,10 +255,10 @@ func (o *Overlord) loadState(backend state.Backend, restartHandler restart.Handl
if err != nil {
return nil, fmt.Errorf("fatal: error opening lock file: %v", err)
}
o.flock = flock
o.stateFLock = flock

logger.Noticef("Acquiring state lock file")
if err := lockWithTimeout(o.flock, overlordStateLockTimeout); err != nil {
if err := lockWithTimeout(o.stateFLock, stateLockTimeout); err != nil {
logger.Noticef("Failed to lock state file")
return nil, fmt.Errorf("fatal: could not lock state file: %v", err)
}
Expand Down Expand Up @@ -485,10 +495,10 @@ func (o *Overlord) Stop() error {
err = o.loopTomb.Wait()
}
o.stateEng.Stop()
if o.flock != nil {
if o.stateFLock != nil {
// This will also unlock the file
o.flock.Close()
logger.Noticef("Released lock file")
o.stateFLock.Close()
logger.Noticef("Released state lock file")
}
return err
}
Expand Down
20 changes: 19 additions & 1 deletion overlord/overlord_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,21 @@ func (ovs *overlordSuite) TestLockWithTimeoutHappy(c *C) {
}

func (ovs *overlordSuite) TestLockWithTimeoutFailed(c *C) {
// Set the state lock retry interval to 0.1 ms; the timeout is not used in
// this test (we specify the timeout when calling the lockWithTimeout()
// function below); we set it to a big value in order to trigger a test
// failure in case the logic gets modified and it suddenly becomes
// relevant.
restoreTimeout := overlord.MockStateLockTimeout(time.Hour, 100 * time.Microsecond)
defer restoreTimeout()

var notifyCalls []string
restoreNotify := overlord.MockSystemdSdNotify(func(notifyState string) error {
notifyCalls = append(notifyCalls, notifyState)
return nil
})
defer restoreNotify()

f, err := ioutil.TempFile("", "testlock-*")
defer func() {
f.Close()
Expand All @@ -1358,6 +1373,9 @@ func (ovs *overlordSuite) TestLockWithTimeoutFailed(c *C) {
c.Assert(err, IsNil)
c.Assert(bytesRead, Equals, len(buf))

err = overlord.LockWithTimeout(flock, 0)
err = overlord.LockWithTimeout(flock, 5 * time.Millisecond)
c.Check(err, ErrorMatches, "timeout for state lock file expired")
c.Check(notifyCalls, DeepEquals, []string{
"EXTEND_TIMEOUT_USEC=5000",
})
}

0 comments on commit 0c89837

Please sign in to comment.