Skip to content

Commit

Permalink
[Carry 362] support detach-netns
Browse files Browse the repository at this point in the history
Planned to be used for:
- accelerating (and deflaking) `nerdctl pull` and `nerdctl build`
- supporting `nerdctl run --net=host`

This commit is based on PR 362 (originally authored by Fahed Dorgaa),
but almost rewritten from scratch.

Co-authored-by: fahed dorgaa <fahed.dorgaa@gmail.com>
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
  • Loading branch information
AkihiroSuda and fahedouch committed Jun 29, 2023
1 parent 2ba2ca7 commit 9874e7d
Show file tree
Hide file tree
Showing 21 changed files with 276 additions and 87 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,20 @@ jobs:
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 1500 --slirp4netns-sandbox=auto --slirp4netns-seccomp=auto
- name: "Benchmark: Network (MTU=1500, network driver=slirp4netns with sandbox and seccomp) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 1500 --slirp4netns-sandbox=auto --slirp4netns-seccomp=auto --detach-netns
# NOTE: MTU greater than 16424 is known not to work for VPNKit.
# Also, MTU greather than 4K might not be effective for VPNKit: https://twitter.com/mugofsoup/status/1017665057738641408
- name: "Benchmark: Network (MTU=1500, network driver=vpnkit)"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-net.sh vpnkit 1500
- name: "Benchmark: Network (MTU=1500, network driver=vpnkit) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-net.sh vpnkit 1500 --detach-netns
- name: "Benchmark: Network (MTU=1500, network driver=lxc-user-nic)"
run: |
docker run --rm --privileged \
Expand Down Expand Up @@ -84,15 +92,35 @@ jobs:
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port.sh slirp4netns
- name: "Benchmark: TCP Ports (port driver=slirp4netns) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port.sh slirp4netns --detach-netns
- name: "Benchmark: TCP Ports (port driver=builtin)"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port.sh builtin
- name: "Benchmark: TCP Ports (port driver=builtin) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port.sh builtin --detach-netns
# ===== Benchmark: UDP Ports =====
- name: "Benchmark: UDP Ports (port driver=slirp4netns)"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port-udp.sh slirp4netns
- name: "Benchmark: UDP Ports (port driver=slirp4netns) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port-udp.sh slirp4netns --detach-netns
- name: "Benchmark: UDP Ports (port driver=builtin)"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port-udp.sh builtin
- name: "Benchmark: UDP Ports (port driver=builtin) with detach-netns"
run: |
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
rootlesskit:test-integration ./benchmark-iperf3-port-udp.sh builtin --detach-netns
test-integration-docker:
name: "Integration test (Docker)"
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ The following files will be created in the state directory, which can be specifi
* `lock`: lock file
* `child_pid`: decimal PID text that can be used for `nsenter(1)`.
* `api.sock`: REST API socket. See [`./docs/api.md`](./docs/api.md) and [`./docs/port.md`](./docs/port.md).
* `netns`: Detached NetNS. Created only with `--detach-netns`. Valid only in the child mount namespace.

If `--state-dir` is not specified, RootlessKit creates a temporary state directory on `/tmp` and removes it on exit.

Expand Down
17 changes: 15 additions & 2 deletions cmd/rootlesskit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ See https://rootlesscontaine.rs/getting-started/common/ .
Name: "ipcns",
Usage: "create an IPC namespace",
}, CategoryProcess),
Categorize(&cli.BoolFlag{
Name: "detach-netns",
Usage: "detach network namespaces ",
}, CategoryNetwork),
Categorize(&cli.StringFlag{
Name: "propagation",
Usage: "mount propagation [rprivate, rslave]",
Expand Down Expand Up @@ -280,6 +284,7 @@ func createParentOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey, pare
CreateCgroupNS: clicontext.Bool("cgroupns"),
CreateUTSNS: clicontext.Bool("utsns"),
CreateIPCNS: clicontext.Bool("ipcns"),
DetachNetNS: clicontext.Bool("detach-netns"),
ParentEUIDEnvKey: parentEUIDEnvKey,
ParentEGIDEnvKey: parentEGIDEnvKey,
Propagation: clicontext.String("propagation"),
Expand Down Expand Up @@ -375,15 +380,21 @@ func createParentOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey, pare
enableSandbox := false
switch s := clicontext.String("slirp4netns-sandbox"); s {
case "auto":
// this might not work when /etc/resolv.conf is a symlink to a file outside /etc or /run
// Sandbox might not work when /etc/resolv.conf is a symlink to a file outside /etc or /run
// https://github.com/rootless-containers/slirp4netns/issues/116
enableSandbox = features.SupportsEnableSandbox

// Sandbox is known to be incompatible with detach-netns
// https://github.com/rootless-containers/slirp4netns/issues/317
enableSandbox = features.SupportsEnableSandbox && !opt.DetachNetNS
case "true":
enableSandbox = true
if !features.SupportsEnableSandbox {
// NOTREACHED
return opt, errors.New("unsupported slirp4netns version: lacks SupportsEnableSandbox")
}
if opt.DetachNetNS {
return opt, errors.New("--slirp4netns-sandbox conflicts with --detach-netns (https://github.com/rootless-containers/slirp4netns/issues/317)")
}
case "false", "": // default
// NOP
default:
Expand Down Expand Up @@ -492,11 +503,13 @@ func (w *logrusDebugWriter) Write(p []byte) (int, error) {

func createChildOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey string, targetCmd []string) (child.Opt, error) {
pidns := clicontext.Bool("pidns")
detachNetNS := clicontext.Bool("detach-netns")
opt := child.Opt{
PipeFDEnvKey: pipeFDEnvKey,
StateDirEnvKey: stateDirEnvKey,
TargetCmd: targetCmd,
MountProcfs: pidns,
DetachNetNS: detachNetNS,
Propagation: clicontext.String("propagation"),
EvacuateCgroup2: clicontext.String("evacuate-cgroup2") != "",
}
Expand Down
1 change: 1 addition & 0 deletions docs/internal.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Common:
- `lock`
- `child_pid`
- `api.sock`
- `netns` (detached netns)

Network driver `slirp4netns`:
- `.s4nn.sock`
Expand Down
7 changes: 7 additions & 0 deletions docs/network.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ Cons:
* Less secure
* Needs `/etc/lxc/lxc-usernet` configuration
* No support for IPv6.
* No support for `--detach-netns`

To use `lxc-user-nic`, you need to install `liblxc-common` package:
```console
Expand All @@ -176,3 +177,9 @@ Currently, the MAC address is always set to a random address.

The `--ipv6` flag (since v0.14.0, EXPERIMENTAL) enables IPv6 routing for slirp4netns network driver.
This flag is unrelated to port forwarding.

## Detaching network namespace
The `--detach-netns` flag (since v2.0.0) detaches network namespaces into `$ROOTLESSKIT_STATE_DIR/netns`
and executes the child command in the host's network namespace.

The child command can enter `$ROOTLESSKIT_STATE_DIR/netns` by itself to create nested network namespaces.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/rootless-containers/rootlesskit
go 1.19

require (
github.com/containernetworking/plugins v1.3.0
github.com/gofrs/flock v0.8.1
github.com/google/uuid v1.3.0
github.com/gorilla/mux v1.8.0
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ=
github.com/containernetworking/plugins v1.3.0 h1:QVNXMT6XloyMUoO2wUOqWTC1hWFV62Q6mVDp5H1HnjM=
github.com/containernetworking/plugins v1.3.0/go.mod h1:Pc2wcedTQQCVuROOOaLBPPxrEXqqXBFt3cZ+/yVg6l0=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw=
github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
Expand All @@ -21,6 +27,8 @@ github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vyg
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/vpnkit v0.5.0 h1:VcDpS9y+PmT9itf+mH5Qdh9GME7ungLMt9yjf9o4REY=
github.com/moby/vpnkit v0.5.0/go.mod h1:KyjUrL9cb6ZSNNAUwZfqRjhwwgJ3BJN+kXh0t43WTUQ=
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc=
github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
Expand Down Expand Up @@ -64,9 +72,11 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
18 changes: 15 additions & 3 deletions hack/benchmark-iperf3-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,38 @@
source $(realpath $(dirname $0))/common.inc.sh
function benchmark::iperf3::slirp4netns() {
INFO "[benchmark:iperf3] slirp4netns ($@)"
statedir=$(mktemp -d)
if echo "$@" | grep -q -- --detach-netns; then
IPERF3C="nsenter -n${statedir}/netns $IPERF3C"
fi
set -x
$ROOTLESSKIT --net=slirp4netns $@ -- $IPERF3C 10.0.2.2
$ROOTLESSKIT --state-dir=$statedir --net=slirp4netns $@ -- $IPERF3C 10.0.2.2
set +x
}

function benchmark::iperf3::vpnkit() {
INFO "[benchmark:iperf3] vpnkit ($@)"
statedir=$(mktemp -d)
if echo "$@" | grep -q -- --detach-netns; then
IPERF3C="nsenter -n${statedir}/netns $IPERF3C"
fi
set -x
$ROOTLESSKIT --net=vpnkit $@ -- $IPERF3C 192.168.65.2
$ROOTLESSKIT --state-dir=$statedir --net=vpnkit $@ -- $IPERF3C 192.168.65.2
set +x
}

function benchmark::iperf3::lxc-user-nic() {
INFO "[benchmark:iperf3] lxc-user-nic ($@)"
statedir=$(mktemp -d)
if echo "$@" | grep -q -- --detach-netns; then
IPERF3C="nsenter -n${statedir}/netns $IPERF3C"
fi
dev=lxcbr0
set -x
# ignore "lxc-net is already running" error
sudo /usr/lib/$(uname -m)-linux-gnu/lxc/lxc-net start || true
ip=$(ip -4 -o addr show $dev | awk '{print $4}' | cut -d "/" -f 1)
$ROOTLESSKIT --net=lxc-user-nic $@ -- $IPERF3C $ip
$ROOTLESSKIT --state-dir=$statedir --net=lxc-user-nic $@ -- $IPERF3C $ip
set +x
}

Expand Down
6 changes: 5 additions & 1 deletion hack/benchmark-iperf3-port-udp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ source $(realpath $(dirname $0))/common.inc.sh
function benchmark::iperf3::port::udp() {
statedir=$(mktemp -d)
INFO "[benchmark:iperf3::port::udp] $@"
$ROOTLESSKIT --state-dir=$statedir $@ iperf3 -s >/dev/null &
IPERF3="iperf3"
if echo "$@" | grep -q -- --detach-netns; then
IPERF3="nsenter -n${statedir}/netns $IPERF3"
fi
$ROOTLESSKIT --state-dir=$statedir $@ $IPERF3 -s >/dev/null &
rkpid=$!
# wait for socket to be available
sleep 3
Expand Down
6 changes: 5 additions & 1 deletion hack/benchmark-iperf3-port.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ source $(realpath $(dirname $0))/common.inc.sh
function benchmark::iperf3::port() {
statedir=$(mktemp -d)
INFO "[benchmark:iperf3::port] $@"
$ROOTLESSKIT --state-dir=$statedir $@ iperf3 -s >/dev/null &
IPERF3="iperf3"
if echo "$@" | grep -q -- --detach-netns; then
IPERF3="nsenter -n${statedir}/netns $IPERF3"
fi
$ROOTLESSKIT --state-dir=$statedir $@ $IPERF3 -s >/dev/null &
rkpid=$!
# wait for socket to be available
sleep 3
Expand Down

0 comments on commit 9874e7d

Please sign in to comment.