Skip to content

Commit

Permalink
Merge pull request #4003 from lifubang/carry3985-01-syncPipe
Browse files Browse the repository at this point in the history
[Carry #3985 Part I] code refactor for process sync
  • Loading branch information
kolyshkin authored Oct 3, 2023
2 parents e712b87 + 8da42aa commit f56b007
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 114 deletions.
61 changes: 29 additions & 32 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,17 +488,10 @@ func isDmzBinarySafe(c *configs.Config) bool {
}

func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
comm, err := newProcessComm()
if err != nil {
return nil, fmt.Errorf("unable to create init pipe: %w", err)
}
messageSockPair := filePair{parentInitPipe, childInitPipe}

parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create log pipe: %w", err)
return nil, err
}
logFilePair := filePair{parentLogPipe, childLogPipe}

// Make sure we use a new safe copy of /proc/self/exe or the runc-dmz
// binary each time this is called, to make sure that if a container
Expand Down Expand Up @@ -569,10 +562,15 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
"_LIBCONTAINER_CONSOLE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
}
cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe)
cmd.Env = append(cmd.Env, "_LIBCONTAINER_STATEDIR="+c.root)

cmd.ExtraFiles = append(cmd.ExtraFiles, comm.initSockChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
"_LIBCONTAINER_STATEDIR="+c.root,
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)

if dmzExe != nil {
Expand All @@ -581,7 +579,7 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
"_LIBCONTAINER_DMZEXEFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
}

cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.logPipeChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
if p.LogLevel != "" {
Expand Down Expand Up @@ -617,9 +615,9 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
if err := c.includeExecFifo(cmd); err != nil {
return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
}
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
return c.newInitProcess(p, cmd, comm)
}
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
return c.newSetnsProcess(p, cmd, comm)
}

// shouldSendMountSources says whether the child process must setup bind mounts with
Expand Down Expand Up @@ -681,27 +679,27 @@ func (c *Container) shouldSendIdmapSources() bool {
return false
}

func (c *Container) sendMountSources(cmd *exec.Cmd, messageSockPair filePair) error {
func (c *Container) sendMountSources(cmd *exec.Cmd, comm *processComm) error {
if !c.shouldSendMountSources() {
return nil
}

return c.sendFdsSources(cmd, messageSockPair, "_LIBCONTAINER_MOUNT_FDS", func(m *configs.Mount) bool {
return c.sendFdsSources(cmd, comm, "_LIBCONTAINER_MOUNT_FDS", func(m *configs.Mount) bool {
return m.IsBind() && !m.IsIDMapped()
})
}

func (c *Container) sendIdmapSources(cmd *exec.Cmd, messageSockPair filePair) error {
func (c *Container) sendIdmapSources(cmd *exec.Cmd, comm *processComm) error {
if !c.shouldSendIdmapSources() {
return nil
}

return c.sendFdsSources(cmd, messageSockPair, "_LIBCONTAINER_IDMAP_FDS", func(m *configs.Mount) bool {
return c.sendFdsSources(cmd, comm, "_LIBCONTAINER_IDMAP_FDS", func(m *configs.Mount) bool {
return m.IsBind() && m.IsIDMapped()
})
}

func (c *Container) sendFdsSources(cmd *exec.Cmd, messageSockPair filePair, envVar string, condition func(*configs.Mount) bool) error {
func (c *Container) sendFdsSources(cmd *exec.Cmd, comm *processComm, envVar string, condition func(*configs.Mount) bool) error {
// Elements on these slices will be paired with mounts (see StartInitialization() and
// prepareRootfs()). These slices MUST have the same size as c.config.Mounts.
fds := make([]int, len(c.config.Mounts))
Expand All @@ -712,11 +710,12 @@ func (c *Container) sendFdsSources(cmd *exec.Cmd, messageSockPair filePair, envV
continue
}

// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
// The fd passed here will not be used: nsexec.c will overwrite it with
// dup3(). We just need to allocate a fd so that we know the number to
// pass in the environment variable. The fd must not be closed before
// cmd.Start(), so we reuse initSockChild because the lifecycle of that
// fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.initSockChild)
fds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
}
fdsJSON, err := json.Marshal(fds)
Expand All @@ -727,7 +726,7 @@ func (c *Container) sendFdsSources(cmd *exec.Cmd, messageSockPair filePair, envV
return nil
}

func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
Expand All @@ -739,17 +738,16 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
if err != nil {
return nil, err
}
if err := c.sendMountSources(cmd, messageSockPair); err != nil {
if err := c.sendMountSources(cmd, comm); err != nil {
return nil, err
}
if err := c.sendIdmapSources(cmd, messageSockPair); err != nil {
if err := c.sendIdmapSources(cmd, comm); err != nil {
return nil, err
}

init := &initProcess{
cmd: cmd,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
comm: comm,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
Expand All @@ -761,7 +759,7 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
return init, nil
}

func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) {
func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
Expand All @@ -778,8 +776,7 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair,
cgroupPaths: state.CgroupPaths,
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
Expand Down
44 changes: 27 additions & 17 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,27 +108,36 @@ func Init() {
// error, it means the initialization has failed. If the error is returned,
// it means the error can not be communicated back to the parent.
func startInitialization() (retErr error) {
// Get the INITPIPE.
envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
pipefd, err := strconv.Atoi(envInitPipe)
// Get the syncrhonisation pipe.
envSyncPipe := os.Getenv("_LIBCONTAINER_SYNCPIPE")
syncPipeFd, err := strconv.Atoi(envSyncPipe)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
return fmt.Errorf("unable to convert _LIBCONTAINER_SYNCPIPE: %w", err)
}
pipe := os.NewFile(uintptr(pipefd), "pipe")
defer pipe.Close()
syncPipe := newSyncSocket(os.NewFile(uintptr(syncPipeFd), "sync"))
defer syncPipe.Close()

defer func() {
// If this defer is ever called, this means initialization has failed.
// Send the error back to the parent process in the form of an initError.
ierr := initError{Message: retErr.Error()}
if err := writeSyncArg(pipe, procError, ierr); err != nil {
if err := writeSyncArg(syncPipe, procError, ierr); err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
// The error is sent, no need to also return it (or it will be reported twice).
retErr = nil
}()

// Get the INITPIPE.
envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
initPipeFd, err := strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
}
initPipe := os.NewFile(uintptr(initPipeFd), "init")
defer initPipe.Close()

// Set up logging. This is used rarely, and mostly for init debugging.

// Passing log level is optional; currently libcontainer/integration does not do it.
Expand Down Expand Up @@ -207,15 +216,16 @@ func startInitialization() (retErr error) {
}
}()

var config initConfig
if err := json.NewDecoder(initPipe).Decode(&config); err != nil {
return err
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, pipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
return containerInit(it, &config, syncPipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
}

func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return err
}
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
if err := populateProcessEnvironment(config.Env); err != nil {
return err
}
Expand Down Expand Up @@ -395,7 +405,7 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe *os.File) error {
func syncParentReady(pipe *syncSocket) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
Expand All @@ -407,18 +417,18 @@ func syncParentReady(pipe *os.File) error {
// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe *os.File) error {
func syncParentHooks(pipe *syncSocket) error {
// Tell parent.
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
return readSync(pipe, procResume)
return readSync(pipe, procHooksDone)
}

// syncParentSeccomp sends the fd associated with the seccomp file descriptor
// to the parent, and wait for the parent to do pidfd_getfd() to grab a copy.
func syncParentSeccomp(pipe *os.File, seccompFd *os.File) error {
func syncParentSeccomp(pipe *syncSocket, seccompFd *os.File) error {
if seccompFd == nil {
return nil
}
Expand Down
Loading

0 comments on commit f56b007

Please sign in to comment.