-
Notifications
You must be signed in to change notification settings - Fork 562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
boot: cmd/snap-bootstrap: handle a candidate recovery system v2 #9940
Changes from 14 commits
f6c4621
45076df
fe16ca9
e1e5115
6e63c31
52bae8b
6e84920
764de5e
2e5ba08
c37312e
c2fabf8
36c4961
be8e108
d34f804
80fe0ed
ccf8adb
36805f6
51550db
d4bb220
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -407,6 +407,9 @@ type recoverModeStateMachine struct { | |||||
// the disk we have all our partitions on | ||||||
disk disks.Disk | ||||||
|
||||||
// when true, the fallback unlock paths will not be tried | ||||||
noFallback bool | ||||||
|
||||||
// TODO:UC20: for clarity turn this into into tristate: | ||||||
// unknown|encrypted|unencrypted | ||||||
isEncryptedDev bool | ||||||
|
@@ -657,13 +660,14 @@ func (m *recoverModeStateMachine) setUnlockStateWithFallbackKey(partName string, | |||||
return nil | ||||||
} | ||||||
|
||||||
func newRecoverModeStateMachine(model *asserts.Model, disk disks.Disk) *recoverModeStateMachine { | ||||||
func newRecoverModeStateMachine(model *asserts.Model, disk disks.Disk, allowFallback bool) *recoverModeStateMachine { | ||||||
m := &recoverModeStateMachine{ | ||||||
model: model, | ||||||
disk: disk, | ||||||
degradedState: &recoverDegradedState{ | ||||||
ErrorLog: []string{}, | ||||||
}, | ||||||
noFallback: !allowFallback, | ||||||
} | ||||||
// first step is to mount ubuntu-boot to check for run mode keys to unlock | ||||||
// ubuntu-data | ||||||
|
@@ -824,6 +828,10 @@ func (m *recoverModeStateMachine) unlockDataRunKey() (stateFunc, error) { | |||||
} | ||||||
|
||||||
func (m *recoverModeStateMachine) unlockDataFallbackKey() (stateFunc, error) { | ||||||
if m.noFallback { | ||||||
return nil, fmt.Errorf("cannot unlock ubuntu-data (fallback disabled)") | ||||||
} | ||||||
|
||||||
// try to unlock data with the fallback key on ubuntu-seed, which must have | ||||||
// been mounted at this point | ||||||
unlockOpts := &secboot.UnlockVolumeUsingSealedKeyOptions{ | ||||||
|
@@ -948,6 +956,11 @@ func (m *recoverModeStateMachine) openUnencryptedSave() (stateFunc, error) { | |||||
func (m *recoverModeStateMachine) unlockEncryptedSaveFallbackKey() (stateFunc, error) { | ||||||
// try to unlock save with the fallback key on ubuntu-seed, which must have | ||||||
// been mounted at this point | ||||||
|
||||||
if m.noFallback { | ||||||
return nil, fmt.Errorf("cannot unlock ubuntu-save (fallback disabled)") | ||||||
} | ||||||
|
||||||
unlockOpts := &secboot.UnlockVolumeUsingSealedKeyOptions{ | ||||||
// we want to allow using the recovery key if the fallback key fails as | ||||||
// using the fallback object is the last chance before we give up trying | ||||||
|
@@ -998,6 +1011,33 @@ func generateMountsModeRecover(mst *initramfsMountsState) error { | |||||
return err | ||||||
} | ||||||
|
||||||
// for most cases we allow the use of fallback to unlock/mount things | ||||||
allowFallback := true | ||||||
|
||||||
tryingCurrentSystem, err := boot.InitramfsIsTryingRecoverySystem(mst.recoverySystem) | ||||||
if err != nil { | ||||||
if boot.IsInconsystemRecoverySystemState(err) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it just now occurred to me that this is a typo, I assume that the original name intention of the function is:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haha, good catch. |
||||||
// there is some try recovery system state in bootenv | ||||||
// but it is inconsistent, make sure we clear it and | ||||||
// return back to run mode | ||||||
|
||||||
// finalize reboots or panics | ||||||
logger.Noticef("try recovery system state is inconsistent: %v", err) | ||||||
finalizeTryRecoverySystemAndReboot(boot.TryRecoverySystemOutcomeInconsistent) | ||||||
} | ||||||
// this could be an inconsistency in the state | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you clarify what kind of situations could lead here, using the phrase There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually this comment is no longer correct and should be dropped |
||||||
return err | ||||||
} | ||||||
if tryingCurrentSystem { | ||||||
// but in this case, use only the run keys | ||||||
allowFallback = false | ||||||
|
||||||
// make sure that if rebooted, the next boot goes into run mode | ||||||
if err := boot.EnsureNextBootToRunMode(""); err != nil { | ||||||
return err | ||||||
} | ||||||
} | ||||||
|
||||||
// 3. run the state machine logic for mounting partitions, this involves | ||||||
// trying to unlock then mount ubuntu-data, and then unlocking and | ||||||
// mounting ubuntu-save | ||||||
|
@@ -1006,7 +1046,7 @@ func generateMountsModeRecover(mst *initramfsMountsState) error { | |||||
|
||||||
machine, err := func() (machine *recoverModeStateMachine, err error) { | ||||||
// first state to execute is to unlock ubuntu-data with the run key | ||||||
machine = newRecoverModeStateMachine(model, disk) | ||||||
machine = newRecoverModeStateMachine(model, disk, allowFallback) | ||||||
for { | ||||||
finished, err := machine.execute() | ||||||
// TODO: consider whether certain errors are fatal or not | ||||||
|
@@ -1020,6 +1060,23 @@ func generateMountsModeRecover(mst *initramfsMountsState) error { | |||||
|
||||||
return machine, nil | ||||||
}() | ||||||
if tryingCurrentSystem { | ||||||
// end of the line for a recovery system we are only trying out, | ||||||
// this branch always ends with a reboot (or a panic) | ||||||
outcome := boot.TryRecoverySystemOutcomeFailure | ||||||
if err == nil && !machine.degraded() { | ||||||
outcome = boot.TryRecoverySystemOutcomeSuccess | ||||||
} | ||||||
if outcome == boot.TryRecoverySystemOutcomeFailure { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe I'm missing something but wouldn't an else be clearer than setting outcome and then checking it again? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||
if err == nil { | ||||||
err = fmt.Errorf("in degraded state") | ||||||
} | ||||||
logger.Noticef("try recovery system %q failed: %v", mst.recoverySystem, err) | ||||||
} | ||||||
// finalize reboots or panics | ||||||
finalizeTryRecoverySystemAndReboot(outcome) | ||||||
} | ||||||
|
||||||
if err != nil { | ||||||
return err | ||||||
} | ||||||
|
@@ -1438,3 +1495,45 @@ func generateMountsModeRun(mst *initramfsMountsState) error { | |||||
|
||||||
return nil | ||||||
} | ||||||
|
||||||
var tryRecoverySystemHealthCheck = func() error { | ||||||
// check that writable is accessible by checking whether the | ||||||
// state file exists | ||||||
if !osutil.FileExists(dirs.SnapStateFileUnder(boot.InitramfsHostWritableDir)) { | ||||||
return fmt.Errorf("host state file is not accessible") | ||||||
} | ||||||
return nil | ||||||
} | ||||||
|
||||||
func finalizeTryRecoverySystemAndReboot(outcome boot.TryRecoverySystemOutcome) (err error) { | ||||||
// from this point on, we must finish with a system reboot | ||||||
defer func() { | ||||||
if rebootErr := boot.InitramfsReboot(); rebootErr != nil { | ||||||
if err != nil { | ||||||
err = fmt.Errorf("%v (cannot reboot to run system: %v)", err, rebootErr) | ||||||
} else { | ||||||
err = fmt.Errorf("cannot reboot to run system: %v", rebootErr) | ||||||
} | ||||||
} | ||||||
// not reached, unless in tests | ||||||
panic(fmt.Errorf("finalize try recovery system did not reboot, last error: %v", err)) | ||||||
}() | ||||||
|
||||||
if outcome == boot.TryRecoverySystemOutcomeSuccess { | ||||||
if err := tryRecoverySystemHealthCheck(); err != nil { | ||||||
// health checks failed, the recovery system is considered | ||||||
// unsuccessful | ||||||
outcome = boot.TryRecoverySystemOutcomeFailure | ||||||
logger.Noticef("try recovery system health check failed: %v", err) | ||||||
} | ||||||
} | ||||||
|
||||||
// that's it, we've tried booting a new recovery system to this point, | ||||||
// whether things are looking good or bad we will reboot back to run | ||||||
// mode and update the boot variables accordingly | ||||||
if err := boot.EnsureNextBootToRunModeWithTryRecoverySystemOutcome(outcome); err != nil { | ||||||
logger.Noticef("cannot update the try recovery system state: %v", err) | ||||||
return fmt.Errorf("cannot mark recovery system successful: %v", err) | ||||||
} | ||||||
return nil | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we could also short cut unlockMaybeEncryptedAloneSaveFallbackKey? it still works as is because over that path we mark things as degraded but is a bit of pointless work?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd leave the check here. It's probably a bit cleaner this way. Also, we identify the system as failed if we either get an error or are in the degraded mode. In case of disabled fallback, we'd get an error if we reach the fallback path.