From fb430feb93be1b2436fbcdb974fac76043c6c53c Mon Sep 17 00:00:00 2001 From: Luca Bruno Date: Fri, 25 Nov 2016 10:13:30 +0000 Subject: [PATCH] stage1/app-rm: unmount volumes and bind-mounts --- stage1/app-rm/app-rm.go | 89 +++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/stage1/app-rm/app-rm.go b/stage1/app-rm/app-rm.go index 7961e86c46..50a5436f7b 100644 --- a/stage1/app-rm/app-rm.go +++ b/stage1/app-rm/app-rm.go @@ -18,10 +18,12 @@ package main import ( "flag" + "fmt" "io/ioutil" "os" "os/exec" "path/filepath" + "syscall" "github.com/coreos/rkt/common" rktlog "github.com/coreos/rkt/pkg/log" @@ -29,20 +31,28 @@ import ( stage1initcommon "github.com/coreos/rkt/stage1/init/common" "github.com/appc/spec/schema/types" + "github.com/coreos/rkt/pkg/mountinfo" ) var ( - flagApp string - debug bool - log *rktlog.Logger - diag *rktlog.Logger + debug bool + flagApp string + flagPhase int + + log *rktlog.Logger + diag *rktlog.Logger ) func init() { flag.StringVar(&flagApp, "app", "", "Application name") flag.BoolVar(&debug, "debug", false, "Run in debug mode") + + // `--phase` is not part of stage1 contract + flag.IntVar(&flagPhase, "phase", 0, "Removal phase, defaults to 0 when called from the outside") } +// This is a multi-phase entrypoint. It invokes itself again in stage1 context +// to perform further cleanup at pod level. func main() { flag.Parse() @@ -59,7 +69,24 @@ func main() { } enterCmd := stage1common.PrepareEnterCmd(false) + switch flagPhase { + case 1: + // phase1: app-rm:phase0 -> app-rm:phase1 + err = cleanupPhase1(appName, enterCmd) + default: + // phase0: stage0 -> app-rm:phase0 + err = cleanupPhase0(appName, enterCmd) + } + if err != nil { + log.FatalE("cleanup error", err) + } + os.Exit(0) +} + +// cleanupPhase0 is default phase for rm entrypoint, performing +// initial cleaning steps which don't custom logic in pod context. +func cleanupPhase0(appName *types.ACName, enterCmd []string) error { args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "is-active") @@ -74,7 +101,7 @@ func main() { out, _ := cmd.Output() if string(out) != "inactive\n" { - log.Fatalf("app %q is still running", appName.String()) + return fmt.Errorf("app %q is still running", appName.String()) } s1rootfs := common.Stage1RootfsPath(".") @@ -86,24 +113,54 @@ func main() { for _, p := range appServicePaths { if err := os.Remove(p); err != nil && !os.IsNotExist(err) { - log.FatalE("error removing app service file", err) + return fmt.Errorf("error removing app service file: %s", err) } } - args = enterCmd - args = append(args, "/usr/bin/systemctl") - args = append(args, "daemon-reload") + // TODO(sur): find all RW cgroups exposed for this app and clean them up - cmd = exec.Cmd{ - Path: args[0], - Args: args, + // last cleaning steps are performed after entering pod context + tasks := [][]string{ + // inception: perform phase1 before proceeding + {"/app-rm", "--phase=1", fmt.Sprintf("--app=%s", appName), fmt.Sprintf("--debug=%t", debug)}, + // all cleaned-up, let systemd reload and forget about this app + {"/usr/bin/systemctl", "daemon-reload"}, } + for _, cmdLine := range tasks { + args := append(enterCmd, cmdLine...) + cmd = exec.Cmd{ + Path: args[0], + Args: args, + } - if out, err := cmd.CombinedOutput(); err != nil { - log.Fatalf("%q failed at daemon-reload:\n%s", appName, out) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("%q removal failed:\n%s", appName, out) + } } + return nil +} - // TODO unmount all the volumes +// cleanupPhase1 inspects pod systemd-pid1 mountinfo to find all remaining +// mountpoints for appName and proceed to clean them up. +func cleanupPhase1(appName *types.ACName, enterCmd []string) error { + // TODO(lucab): re-evaluate if we move to systemd as non-pid1 (eg. host-pid ns inheriting) + mnts, err := mountinfo.ParseMounts(1) + if err != nil { + return err + } + appRootFs := filepath.Join("/opt/stage2", appName.String(), "rootfs") + mnts = mnts.Filter(mountinfo.HasPrefix(appRootFs)) + + // soft-errors here, stage0 may still be able to continue with the removal anyway + for _, m := range mnts { + // unlink first to avoid back-propagation + _ = syscall.Mount("", m.MountPoint, "", syscall.MS_PRIVATE|syscall.MS_REC, "") + // simple unmount, it may fail if the target is busy (eg. overlapping children) + if e := syscall.Unmount(m.MountPoint, 0); e != nil { + // if busy, just detach here and let the kernel clean it once free + _ = syscall.Unmount(m.MountPoint, syscall.MNT_DETACH) + } + } - os.Exit(0) + return nil }