Skip to content

Commit

Permalink
Merge pull request #138 from AkihiroSuda/a
Browse files Browse the repository at this point in the history
Use fuse-overlayfs (on all distros)
  • Loading branch information
AkihiroSuda committed Mar 5, 2020
2 parents 7667ada + e397ae6 commit 78aff3d
Show file tree
Hide file tree
Showing 12 changed files with 1,107 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/main.yaml
Expand Up @@ -4,6 +4,8 @@ jobs:
main:
runs-on: ubuntu-latest
steps:
- name: "System info"
run: sh -xec "uname -a; docker info; cat /proc/cpuinfo; df -h"
- uses: actions/checkout@v1
- name: "Build image"
run: DOCKER_BUILDKIT=1 docker build -t rootlesscontainers/usernetes .
Expand Down
34 changes: 33 additions & 1 deletion Dockerfile
Expand Up @@ -12,6 +12,8 @@ ARG SLIRP4NETNS_COMMIT=a8414d1d1629f6f7a93b60b55e183a93d10d9a1c
ARG RUNC_COMMIT=6503438fd6b0415bc146403b30b8a248b3346f52
# 2020-03-01T05:55:11Z
ARG CONTAINERD_COMMIT=ca66f3dd5d917694797a377b8ab9b8fb603b089b
# 2020-02-20T08:27:20Z
ARG CONTAINERD_FUSE_OVERLAYFS_COMMIT=bb896865146c4c0fdb06933f7fddb5603019e0ed
# 2020-03-05T09:10:28Z
ARG CRIO_COMMIT=d0665fd81f4a651d6b4c6a480d2b598b18c93f4f
# 2020-03-05T10:58:50Z
Expand All @@ -20,6 +22,7 @@ ARG KUBERNETES_COMMIT=67c6767b7da983034be04a31575261890186338a
# Version definitions (cont.)
ARG CONMON_RELEASE=v2.0.11
ARG CRUN_RELEASE=0.13
ARG FUSE_OVERLAYFS_RELEASE=v0.7.6
# Kube's build script requires KUBE_GIT_VERSION to be set to a semver string
ARG KUBE_GIT_VERSION=v1.18.0-usernetes
ARG SOCAT_RELEASE=1.7.3.4
Expand Down Expand Up @@ -59,6 +62,22 @@ RUN git pull && git checkout ${SLIRP4NETNS_COMMIT}
RUN ./autogen.sh && ./configure -q LDFLAGS="-static" && make --quiet && \
mkdir /out && cp slirp4netns /out

### fuse-overlayfs (fuse-overlayfs-build)
# Based on https://github.com/containers/fuse-overlayfs/blob/v0.7.6/Dockerfile.static.ubuntu .
# We can't use Alpine here because Alpine does not provide an apk package for libfuse3.a .
FROM debian:10 AS fuse-overlayfs-build
RUN apt-get update && \
apt-get install -q --no-install-recommends -y \
git ca-certificates libc6-dev gcc make automake autoconf pkgconf libfuse3-dev file
RUN git clone https://github.com/containers/fuse-overlayfs
WORKDIR fuse-overlayfs
ARG FUSEOVERLAYFS_RELEASE
RUN git pull && git checkout ${FUSEOVERLAYFS_RELEASE}
RUN ./autogen.sh && \
LIBS="-ldl" LDFLAGS="-static" ./configure -q && \
make --quiet && mkdir /out && cp fuse-overlayfs /out && \
file /out/fuse-overlayfs | grep "statically linked"

### crun (crun-build)
FROM busybox AS crun-build
ARG CRUN_RELEASE
Expand All @@ -76,6 +95,17 @@ RUN make --quiet EXTRA_FLAGS="-buildmode pie" EXTRA_LDFLAGS='-extldflags "-fno-P
bin/containerd bin/containerd-shim-runc-v2 bin/ctr && \
mkdir /out && cp bin/containerd bin/containerd-shim-runc-v2 bin/ctr /out

### containerd-fuse-overlayfs (containerd-fuse-overlayfs-build)
FROM common-golang-alpine AS containerd-fuse-overlayfs-build
RUN git clone -q https://github.com/AkihiroSuda/containerd-fuse-overlayfs.git /go/src/github.com/AkihiroSuda/containerd-fuse-overlayfs
WORKDIR /go/src/github.com/AkihiroSuda/containerd-fuse-overlayfs
ARG CONTAINERD_FUSE_OVERLAYFS_COMMIT
RUN git pull && git checkout ${CONTAINERD_FUSE_OVERLAYFS_COMMIT}
ENV CGO_ENABLED=0
ENV GO111MODULE=off
RUN mkdir /out && \
go build -o /out/containerd-fuse-overlayfs-grpc github.com/AkihiroSuda/containerd-fuse-overlayfs/cmd/containerd-fuse-overlayfs-grpc

### CRI-O (crio-build)
FROM common-golang-alpine-heavy AS crio-build
RUN git clone -q https://github.com/cri-o/cri-o.git /go/src/github.com/cri-o/cri-o
Expand Down Expand Up @@ -147,8 +177,10 @@ RUN mkdir /tmp-etcd out && \
FROM scratch AS bin-main
COPY --from=rootlesskit-build /out/* /
COPY --from=slirp4netns-build /out/* /
COPY --from=fuse-overlayfs-build /out/* /
COPY --from=crun-build /out/* /
COPY --from=containerd-build /out/* /
COPY --from=containerd-fuse-overlayfs-build /out/* /
COPY --from=crio-build /out/* /
COPY --from=conmon-build /out/* /
# can't use wildcard here: https://github.com/rootless-containers/usernetes/issues/78
Expand All @@ -164,7 +196,7 @@ ADD https://raw.githubusercontent.com/AkihiroSuda/containerized-systemd/6ced78a9
RUN chmod +x /docker-entrypoint.sh && \
# As of Feb 2020, Fedora has wrong permission bits on newuidmap and newgidmap.
chmod +s /usr/bin/newuidmap /usr/bin/newgidmap && \
dnf install -q -y findutils git iproute iptables hostname procps-ng \
dnf install -q -y findutils fuse3 git iproute iptables hostname procps-ng \
# systemd-container: for machinectl
systemd-container && \
useradd --create-home --home-dir /home/user --uid 1000 -G systemd-journal user && \
Expand Down
23 changes: 11 additions & 12 deletions README.md
Expand Up @@ -77,13 +77,17 @@ Usernetes executes Kubernetes and CRI runtimes without the root privileges by us

To set up NAT across the host and the network namespace without the root privilege, Usernetes uses a usermode network stack ([slirp4netns](https://github.com/rootless-containers/slirp4netns)).

No SETUID/SETCAP binary is needed. except [`newuidmap(1)`](http://man7.org/linux/man-pages/man1/newuidmap.1.html) and [`newgidmap(1)`](http://man7.org/linux/man-pages/man1/newgidmap.1.html), which are used for setting up [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) with multiple sub-UIDs and sub-GIDs.
No SETUID/SETCAP binary is needed, except [`newuidmap(1)`](http://man7.org/linux/man-pages/man1/newuidmap.1.html) and [`newgidmap(1)`](http://man7.org/linux/man-pages/man1/newgidmap.1.html), which are used for setting up [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) with multiple sub-UIDs and sub-GIDs.

## Requirements

* Recent version of systemd
* Kernel >= 4.18.

* `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package on most distros.
* Recent version of systemd. Known to work with systemd >= 242.

* `mount.fuse3` binary. Provided by `fuse3` package on most distros.

* `newuidmap` and `newgidmap` binaries. Provided by `uidmap` package on most distros.

* `/etc/subuid` and `/etc/subgid` should contain more than 65536 sub-IDs. e.g. `exampleuser:231072:65536`. These files are automatically configured on most distros.

Expand All @@ -101,11 +105,9 @@ exampleuser:231072:65536
### Distribution-specific hint
#### Ubuntu
* No preparation is needed.
* overlayfs is enabled by default ([Ubuntu-specific kernel patch](https://kernel.ubuntu.com/git/ubuntu/ubuntu-bionic.git/commit/fs/overlayfs?id=3b7da90f28fe1ed4b79ef2d994c81efbc58f1144)).

#### Debian GNU/Linux
* Add `kernel.unprivileged_userns_clone=1` to `/etc/sysctl.conf` (or `/etc/sysctl.d`) and run `sudo sysctl -p`
* To use overlayfs (recommended), run `sudo modprobe overlay permit_mounts_in_userns=1` ([Debian-specific kernel patch, introduced in Debian 10](https://salsa.debian.org/kernel-team/linux/blob/283390e7feb21b47779b48e0c8eb0cc409d2c815/debian/patches/debian/overlayfs-permit-mounts-in-userns.patch)). Put the configuration to `/etc/modprobe.d` for persistence.

#### Arch Linux
* Add `kernel.unprivileged_userns_clone=1` to `/etc/sysctl.conf` (or `/etc/sysctl.d`) and run `sudo sysctl -p`
Expand All @@ -127,17 +129,12 @@ exampleuser:231072:65536

## Restrictions

Common:
* [slirp4netns](https://github.com/rootless-containers/slirp4netns) is used instead of [vEth](http://man7.org/linux/man-pages/man4/veth.4.html) pairs.
* [fuse-overlayfs](https://github.com/containers/fuse-overlayfs) is used instead of overlayfs.
* Following features are not supported:
* Cgroups
* Apparmor

CRI-O:
* Only `vfs` storage driver is supported.

containerd:
* Only `native` storage driver is supported. However, on Ubuntu and a few distros, `overlayfs` is also supported.

## Install from binary

Download the latest `usernetes-x86_64.tbz` from [Releases](https://github.com/rootless-containers/usernetes/releases).
Expand Down Expand Up @@ -333,5 +330,7 @@ Usernetes is licensed under the terms of [Apache License Version 2.0](LICENSE).

The binary releases of Usernetes contain files that are licensed under the terms of different licenses:

* `bin/crun`: [GNU GENERAL PUBLIC LICENSE Version 2](docs/binary-release-license/LICENSE-crun), see https://github.com/containers/crun
* `bin/fuse-overlayfs`: [GNU GENERAL PUBLIC LICENSE Version 3](docs/binary-release-license/LICENSE-fuse-overlayfs), see https://github.com/containers/fuse-overlayfs
* `bin/slirp4netns`: [GNU GENERAL PUBLIC LICENSE Version 2](docs/binary-release-license/LICENSE-slirp4netns), see https://github.com/rootless-containers/slirp4netns
* `bin/socat`: [GNU GENERAL PUBLIC LICENSE Version 2](docs/binary-release-license/LICENSE-socat), see http://www.dest-unreach.org/socat/
11 changes: 11 additions & 0 deletions boot/containerd-fuse-overlayfs-grpc.sh
@@ -0,0 +1,11 @@
#!/bin/bash
export U7S_BASE_DIR=$(realpath $(dirname $0)/..)
source $U7S_BASE_DIR/common/common.inc.sh
nsenter::main $0 $@

mkdir -p $XDG_RUNTIME_DIR/usernetes/containerd $XDG_DATA_HOME/usernetes/containerd

exec containerd-fuse-overlayfs-grpc \
$@ \
$XDG_RUNTIME_DIR/usernetes/containerd/fuse-overlayfs.sock \
$XDG_DATA_HOME/usernetes/containerd/io.containerd.snapshotter.v1.fuse-overlayfs
6 changes: 5 additions & 1 deletion boot/containerd.sh
Expand Up @@ -10,13 +10,17 @@ root = "$XDG_DATA_HOME/usernetes/containerd"
state = "$XDG_RUNTIME_DIR/usernetes/containerd"
[grpc]
address = "$XDG_RUNTIME_DIR/usernetes/containerd/containerd.sock"
[proxy_plugins]
[proxy_plugins."fuse-overlayfs"]
type = "snapshot"
address = "$XDG_RUNTIME_DIR/usernetes/containerd/fuse-overlayfs.sock"
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
disable_cgroup = true
disable_apparmor = true
restrict_oom_score_adj = true
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "$(overlayfs::supported && echo overlayfs || echo native)"
snapshotter = "fuse-overlayfs"
default_runtime_name = "crun"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.crun]
Expand Down
5 changes: 4 additions & 1 deletion boot/crio.sh
Expand Up @@ -15,7 +15,10 @@ cat >$XDG_CONFIG_HOME/usernetes/crio/crio.conf <<EOF
runroot = "$XDG_RUNTIME_DIR/usernetes/containers/storage"
root = "$XDG_DATA_HOME/usernetes/containers/storage"
version_file = "$XDG_RUNTIME_DIR/usernetes/crio/version"
storage_driver = "vfs"
storage_driver = "overlay"
storage_option = [
"overlay.mount_program=$U7S_BASE_DIR/bin/fuse-overlayfs"
]
[crio.api]
listen = "$XDG_RUNTIME_DIR/usernetes/crio/crio.sock"
[crio.image]
Expand Down
5 changes: 4 additions & 1 deletion boot/docker-2ndboot.sh
@@ -1,5 +1,8 @@
#!/bin/bash
cd $(realpath $(dirname $0)/..)
set -eux
./install.sh $@
if ! ./install.sh $@; then
journalctl -xe --no-pager
exit 1
fi
exec journalctl -f -n 100
7 changes: 0 additions & 7 deletions common/common.inc.sh
Expand Up @@ -90,13 +90,6 @@ function nsenter::_nsenter() {
nsenter -U --preserve-credential -n -m -p -t $(cat $pidfile) --wd=$PWD -- $@
}

## overlayfs utilities
function overlayfs::supported() {
rc=0
(uname -v | grep Ubuntu >/dev/null) || rc=$?
return $rc
}

# entrypoint begins
if debug::enabled; then
log::warning "Running in debug mode (\$U7S_DEBUG)"
Expand Down

0 comments on commit 78aff3d

Please sign in to comment.