Skip to content

Commit

Permalink
[Carry 362] support detach-netns
Browse files Browse the repository at this point in the history
Planned to be used for:
- accelerating (and deflaking) `nerdctl pull` and `nerdctl build`
- supporting `nerdctl run --net=host`

TODO: Add tests

This commit is based on PR 362 (originally authored by Fahed Dorgaa),
but almost rewritten from scratch.

Co-authored-by: fahed dorgaa <fahed.dorgaa@gmail.com>
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
  • Loading branch information
AkihiroSuda and fahedouch committed Jun 28, 2023
1 parent 2ba2ca7 commit 84e7264
Show file tree
Hide file tree
Showing 17 changed files with 184 additions and 61 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ The following files will be created in the state directory, which can be specifi
* `lock`: lock file
* `child_pid`: decimal PID text that can be used for `nsenter(1)`.
* `api.sock`: REST API socket. See [`./docs/api.md`](./docs/api.md) and [`./docs/port.md`](./docs/port.md).
* `netns`: Detached NetNS. Created only with `--detach-netns`. Valid only in the child mount namespace.

If `--state-dir` is not specified, RootlessKit creates a temporary state directory on `/tmp` and removes it on exit.

Expand Down
7 changes: 7 additions & 0 deletions cmd/rootlesskit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ See https://rootlesscontaine.rs/getting-started/common/ .
Name: "ipcns",
Usage: "create an IPC namespace",
}, CategoryProcess),
Categorize(&cli.BoolFlag{
Name: "detach-netns",
Usage: "detach network namespaces ",
}, CategoryNetwork),
Categorize(&cli.StringFlag{
Name: "propagation",
Usage: "mount propagation [rprivate, rslave]",
Expand Down Expand Up @@ -280,6 +284,7 @@ func createParentOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey, pare
CreateCgroupNS: clicontext.Bool("cgroupns"),
CreateUTSNS: clicontext.Bool("utsns"),
CreateIPCNS: clicontext.Bool("ipcns"),
DetachNetNS: clicontext.Bool("detach-netns"),
ParentEUIDEnvKey: parentEUIDEnvKey,
ParentEGIDEnvKey: parentEGIDEnvKey,
Propagation: clicontext.String("propagation"),
Expand Down Expand Up @@ -492,11 +497,13 @@ func (w *logrusDebugWriter) Write(p []byte) (int, error) {

func createChildOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey string, targetCmd []string) (child.Opt, error) {
pidns := clicontext.Bool("pidns")
detachNetNS := clicontext.Bool("detach-netns")
opt := child.Opt{
PipeFDEnvKey: pipeFDEnvKey,
StateDirEnvKey: stateDirEnvKey,
TargetCmd: targetCmd,
MountProcfs: pidns,
DetachNetNS: detachNetNS,
Propagation: clicontext.String("propagation"),
EvacuateCgroup2: clicontext.String("evacuate-cgroup2") != "",
}
Expand Down
1 change: 1 addition & 0 deletions docs/internal.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Common:
- `lock`
- `child_pid`
- `api.sock`
- `netns` (detached netns)

Network driver `slirp4netns`:
- `.s4nn.sock`
Expand Down
6 changes: 6 additions & 0 deletions docs/network.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,9 @@ Currently, the MAC address is always set to a random address.

The `--ipv6` flag (since v0.14.0, EXPERIMENTAL) enables IPv6 routing for slirp4netns network driver.
This flag is unrelated to port forwarding.

## Detaching network namespace
The `--detach-netns` flag (since v2.0.0) detaches network namespaces into `$ROOTLESSKIT_STATE_DIR/netns`
and executes the child command in the host's network namespace.

The child command can enter `$ROOTLESSKIT_STATE_DIR/netns` by itself to create nested network namespaces.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/rootless-containers/rootlesskit
go 1.19

require (
github.com/containernetworking/plugins v1.3.0
github.com/gofrs/flock v0.8.1
github.com/google/uuid v1.3.0
github.com/gorilla/mux v1.8.0
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ=
github.com/containernetworking/plugins v1.3.0 h1:QVNXMT6XloyMUoO2wUOqWTC1hWFV62Q6mVDp5H1HnjM=
github.com/containernetworking/plugins v1.3.0/go.mod h1:Pc2wcedTQQCVuROOOaLBPPxrEXqqXBFt3cZ+/yVg6l0=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw=
github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
Expand All @@ -21,6 +27,8 @@ github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vyg
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/vpnkit v0.5.0 h1:VcDpS9y+PmT9itf+mH5Qdh9GME7ungLMt9yjf9o4REY=
github.com/moby/vpnkit v0.5.0/go.mod h1:KyjUrL9cb6ZSNNAUwZfqRjhwwgJ3BJN+kXh0t43WTUQ=
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc=
github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
Expand Down Expand Up @@ -64,9 +72,11 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
111 changes: 79 additions & 32 deletions pkg/child/child.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@ import (
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"strconv"
"syscall"
"time"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/rootless-containers/rootlesskit/pkg/common"
"github.com/rootless-containers/rootlesskit/pkg/copyup"
"github.com/rootless-containers/rootlesskit/pkg/messages"
"github.com/rootless-containers/rootlesskit/pkg/network"
"github.com/rootless-containers/rootlesskit/pkg/port"
"github.com/rootless-containers/rootlesskit/pkg/sigproxy"
sigproxysignal "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

var propagationStates = map[string]uintptr{
Expand Down Expand Up @@ -152,39 +153,60 @@ func setupCopyDir(driver copyup.ChildDriver, dirs []string) (bool, error) {
return false, nil
}

func setupNet(stateDir string, msg *messages.ParentInitNetworkDriverCompleted, etcWasCopied bool, driver network.ChildDriver) error {
func setupNet(stateDir string, msg *messages.ParentInitNetworkDriverCompleted, etcWasCopied bool, driver network.ChildDriver, detachedNetNSPath string) error {
// HostNetwork
if driver == nil {
return nil
}
if err := activateLoopback(); err != nil {
return err
}
dev, err := driver.ConfigureNetworkChild(msg)
if err != nil {
return err
}
if err := activateDev(dev, msg.IP, msg.Netmask, msg.Gateway, msg.MTU); err != nil {
return err
}
if etcWasCopied {
if err := writeResolvConf(msg.DNS); err != nil {

if detachedNetNSPath == "" {
// non-detached mode
if err := activateLoopback(); err != nil {
return err
}
dev, err := driver.ConfigureNetworkChild(msg, detachedNetNSPath)
if err != nil {
return err
}
if err := writeEtcHosts(); err != nil {
if err := activateDev(dev, msg.IP, msg.Netmask, msg.Gateway, msg.MTU); err != nil {
return err
}
if etcWasCopied {
if err := writeResolvConf(msg.DNS); err != nil {
return err
}
if err := writeEtcHosts(); err != nil {
return err
}
} else {
logrus.Warn("Mounting /etc/resolv.conf without copying-up /etc. " +
"Note that /etc/resolv.conf in the namespace will be unmounted when it is recreated on the host. " +
"Unless /etc/resolv.conf is statically configured, copying-up /etc is highly recommended. " +
"Please refer to RootlessKit documentation for further information.")
if err := mountResolvConf(stateDir, msg.DNS); err != nil {
return err
}
if err := mountEtcHosts(stateDir); err != nil {
return err
}
}
} else {
logrus.Warn("Mounting /etc/resolv.conf without copying-up /etc. " +
"Note that /etc/resolv.conf in the namespace will be unmounted when it is recreated on the host. " +
"Unless /etc/resolv.conf is statically configured, copying-up /etc is highly recommended. " +
"Please refer to RootlessKit documentation for further information.")
if err := mountResolvConf(stateDir, msg.DNS); err != nil {
// detached mode
if err := ns.WithNetNSPath(detachedNetNSPath, func(_ ns.NetNS) error {
return activateLoopback()
}); err != nil {
return err
}
dev, err := driver.ConfigureNetworkChild(msg, detachedNetNSPath)
if err != nil {
return err
}
if err := mountEtcHosts(stateDir); err != nil {
if err := ns.WithNetNSPath(detachedNetNSPath, func(_ ns.NetNS) error {
return activateDev(dev, msg.IP, msg.Netmask, msg.Gateway, msg.MTU)
}); err != nil {
return err
}
// TODO: write /etc/resolv.conf and /etc/hosts in a custom directory?
}
return nil
}
Expand All @@ -196,6 +218,7 @@ type Opt struct {
NetworkDriver network.ChildDriver // nil for HostNetwork
CopyUpDriver copyup.ChildDriver // cannot be nil if len(CopyUpDirs) != 0
CopyUpDirs []string
DetachNetNS bool
PortDriver port.ChildDriver
MountProcfs bool // needs to be set if (and only if) parent.Opt.CreatePIDNS is set
Propagation string // mount propagation type
Expand Down Expand Up @@ -322,6 +345,20 @@ func Child(opt Opt) error {
}
}

if opt.MountProcfs {
if err := mountProcfs(); err != nil {
return err
}
}

var detachedNetNSPath string
if opt.DetachNetNS {
detachedNetNSPath = filepath.Join(stateDir, "netns")
if err = NewNetNsWithPathWithoutEnter(detachedNetNSPath); err != nil {
return fmt.Errorf("failed to create a detached netns on %q: %w", detachedNetNSPath, err)
}
}

msgChildInitUserNSCompleted := &messages.Message{
U: messages.U{
ChildInitUserNSCompleted: &messages.ChildInitUserNSCompleted{},
Expand Down Expand Up @@ -362,17 +399,14 @@ func Child(opt Opt) error {
if err != nil {
return err
}
if err := mountSysfs(opt.NetworkDriver == nil, opt.EvacuateCgroup2); err != nil {
return err
}
if err := setupNet(stateDir, netMsg, etcWasCopied, opt.NetworkDriver); err != nil {
return err
}
if opt.MountProcfs {
if err := mountProcfs(); err != nil {
if detachedNetNSPath == "" {
if err := mountSysfs(opt.NetworkDriver == nil, opt.EvacuateCgroup2); err != nil {
return err
}
}
if err := setupNet(stateDir, netMsg, etcWasCopied, opt.NetworkDriver, detachedNetNSPath); err != nil {
return err
}
portQuitCh := make(chan struct{})
portErrCh := make(chan error)
if opt.PortDriver != nil {
Expand All @@ -381,7 +415,7 @@ func Child(opt Opt) error {
portDriverOpaque = portMsg.PortDriverOpaque
}
go func() {
portErrCh <- opt.PortDriver.RunChildDriver(portDriverOpaque, portQuitCh)
portErrCh <- opt.PortDriver.RunChildDriver(portDriverOpaque, portQuitCh, detachedNetNSPath)
}()
}

Expand Down Expand Up @@ -484,3 +518,16 @@ func (e *reaperErr) Error() string {
}
return fmt.Sprintf("exited with WAITSTATUS=0x%08x", e.ws)
}

func NewNetNsWithPathWithoutEnter(p string) error {
if err := os.WriteFile(p, nil, 0400); err != nil {
return err
}
// this is hard (not impossible though) to reimplement in Go: https://github.com/cloudflare/slirpnetstack/commit/d7766a8a77f0093d3cb7a94bd0ccbe3f67d411ba
cmd := exec.Command("unshare", "-n", "mount", "--bind", "/proc/self/ns/net", p)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to execute %v: %w (out=%q)", cmd.Args, err, string(out))
}
return nil
}
11 changes: 9 additions & 2 deletions pkg/network/lxcusernic/lxcusernic.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ func (d *parentDriver) MTU() int {
return d.mtu
}

func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*messages.ParentInitNetworkDriverCompleted, func() error, error) {
func (d *parentDriver) ConfigureNetwork(childPID int, stateDir, detachedNetNSPath string) (*messages.ParentInitNetworkDriverCompleted, func() error, error) {
if detachedNetNSPath != "" {
return nil, nil, fmt.Errorf("network driver %q does not support detach-netns", DriverName)
}
var cleanups []func() error
dummyLXCPath := "/dev/null"
dummyLXCName := "dummy"
Expand Down Expand Up @@ -127,7 +130,11 @@ func exchangeDHCP(c *client4.Client, dev string) (*dhcpv4.DHCPv4, error) {
return ack, nil
}

func (d *childDriver) ConfigureNetworkChild(netmsg *messages.ParentInitNetworkDriverCompleted) (string, error) {
func (d *childDriver) ConfigureNetworkChild(netmsg *messages.ParentInitNetworkDriverCompleted, detachedNetNSPath string) (string, error) {
if detachedNetNSPath != "" {
return "", fmt.Errorf("network driver %q does not support detach-netns", DriverName)
}

dev := netmsg.Dev
if dev == "" {
return "", errors.New("could not determine the dev")
Expand Down
7 changes: 5 additions & 2 deletions pkg/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ type ParentDriver interface {
// MTU returns MTU
MTU() int
// ConfigureNetwork sets up Slirp, updates msg, and returns destructor function.
ConfigureNetwork(childPID int, stateDir string) (netmsg *messages.ParentInitNetworkDriverCompleted, cleanup func() error, err error)
// detachedNetNSPath is set only for the detach-netns mode.
ConfigureNetwork(childPID int, stateDir, detachedNetNSPath string) (netmsg *messages.ParentInitNetworkDriverCompleted, cleanup func() error, err error)
}

// ChildDriver is called from the child namespace
type ChildDriver interface {
// ConfigureNetworkChild is executed in the child's namespaces, excluding detached-netns.
//
// netmsg MAY be modified.
// devName is like "tap" or "eth0"
ConfigureNetworkChild(netmsg *messages.ParentInitNetworkDriverCompleted) (devName string, err error)
ConfigureNetworkChild(netmsg *messages.ParentInitNetworkDriverCompleted, detachedNetNSPath string) (devName string, err error)
}
19 changes: 13 additions & 6 deletions pkg/network/parentutils/parentutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,31 @@ package parentutils

import (
"fmt"

"os"
"strconv"

"github.com/rootless-containers/rootlesskit/pkg/common"
)

func PrepareTap(pid int, tap string) error {
func PrepareTap(childPID int, childNetNsPath string, tap string) error {
cmds := [][]string{
nsenter(pid, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}),
nsenter(pid, []string{"ip", "link", "set", tap, "up"}),
nsenter(childPID, childNetNsPath, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}),
nsenter(childPID, childNetNsPath, []string{"ip", "link", "set", tap, "up"}),
}
if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil {
return fmt.Errorf("executing %v: %w", cmds, err)
}
return nil
}

func nsenter(pid int, cmd []string) []string {
return append([]string{"nsenter", "-t", strconv.Itoa(pid), "-n", "-m", "-U", "--preserve-credentials"}, cmd...)
func nsenter(childPID int, childNetNsPath string, cmd []string) []string {
fullCmd := []string{"nsenter", "-t", strconv.Itoa(childPID)}
if childNetNsPath != "" {
fullCmd = append(fullCmd, "-n"+childNetNsPath)
} else {
fullCmd = append(fullCmd, "-n")
}
fullCmd = append(fullCmd, []string{"-m", "-U", "--preserve-credentials"}...)
fullCmd = append(fullCmd, cmd...)
return fullCmd
}

0 comments on commit 84e7264

Please sign in to comment.