diff --git a/client/client_test.go b/client/client_test.go index 2d74b9d4c0fd..1878031ad90b 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -853,6 +853,10 @@ func testCgroupParent(t *testing.T, sb integration.Sandbox) { t.SkipNow() } + if _, err := os.Lstat("/sys/fs/cgroup/cgroup.subtree_control"); os.IsNotExist(err) { + t.Skipf("test requires cgroup v2") + } + c, err := New(sb.Context(), sb.Address()) require.NoError(t, err) defer c.Close() @@ -864,8 +868,21 @@ func testCgroupParent(t *testing.T, sb integration.Sandbox) { st = img.Run(append(ro, llb.Shlex(cmd), llb.Dir("/wd"))...).AddMount("/wd", st) } - run(`sh -c "cat /proc/self/cgroup > first"`, llb.WithCgroupParent("foocgroup")) - run(`sh -c "cat /proc/self/cgroup > second"`) + cgroupName := "test." + identity.NewID() + + err = os.MkdirAll(filepath.Join("/sys/fs/cgroup", cgroupName), 0755) + require.NoError(t, err) + + defer func() { + err := os.RemoveAll(filepath.Join("/sys/fs/cgroup", cgroupName)) + require.NoError(t, err) + }() + + err = os.WriteFile(filepath.Join("/sys/fs/cgroup", cgroupName, "pids.max"), []byte("10"), 0644) + require.NoError(t, err) + + run(`sh -c "(for i in $(seq 1 10); do sleep 1 & done 2>first.error); cat /proc/self/cgroup >> first"`, llb.WithCgroupParent(cgroupName)) + run(`sh -c "(for i in $(seq 1 10); do sleep 1 & done 2>second.error); cat /proc/self/cgroup >> second"`) def, err := st.Marshal(sb.Context()) require.NoError(t, err) @@ -882,13 +899,22 @@ func testCgroupParent(t *testing.T, sb integration.Sandbox) { }, nil) require.NoError(t, err) + // neither process leaks parent cgroup name inside container dt, err := os.ReadFile(filepath.Join(destDir, "first")) require.NoError(t, err) - require.Contains(t, strings.TrimSpace(string(dt)), `/foocgroup/buildkit/`) + require.NotContains(t, strings.TrimSpace(string(dt)), cgroupName) dt2, err := os.ReadFile(filepath.Join(destDir, "second")) require.NoError(t, err) - require.NotContains(t, strings.TrimSpace(string(dt2)), `/foocgroup/buildkit/`) + require.NotContains(t, strings.TrimSpace(string(dt2)), cgroupName) + + dt, err = os.ReadFile(filepath.Join(destDir, "first.error")) + require.NoError(t, err) + require.Contains(t, strings.TrimSpace(string(dt)), "Resource temporarily unavailable") + + dt, err = os.ReadFile(filepath.Join(destDir, "second.error")) + require.NoError(t, err) + require.Equal(t, strings.TrimSpace(string(dt)), "") } func testNetworkMode(t *testing.T, sb integration.Sandbox) { diff --git a/executor/oci/spec.go b/executor/oci/spec.go index 054c28dd474e..c6d665b0817e 100644 --- a/executor/oci/spec.go +++ b/executor/oci/spec.go @@ -137,6 +137,12 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou return nil, nil, err } + if cgroupNamespaceSupported() { + s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{ + Type: specs.CgroupNamespace, + }) + } + if len(meta.Ulimit) == 0 { // reset open files limit s.Process.Rlimits = nil diff --git a/executor/oci/spec_unix.go b/executor/oci/spec_unix.go index 3c809e7ff9f3..97e95e9834b2 100644 --- a/executor/oci/spec_unix.go +++ b/executor/oci/spec_unix.go @@ -6,7 +6,9 @@ package oci import ( "context" "fmt" + "os" "strings" + "sync" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/oci" @@ -21,6 +23,11 @@ import ( "github.com/pkg/errors" ) +var ( + cgroupNSOnce sync.Once + supportsCgroupNS bool +) + const ( tracingSocketPath = "/dev/otel-grpc.sock" ) @@ -139,3 +146,12 @@ func getTracingSocketMount(socket string) specs.Mount { func getTracingSocket() string { return fmt.Sprintf("unix://%s", tracingSocketPath) } + +func cgroupNamespaceSupported() bool { + cgroupNSOnce.Do(func() { + if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) { + supportsCgroupNS = true + } + }) + return supportsCgroupNS +} diff --git a/executor/oci/spec_windows.go b/executor/oci/spec_windows.go index faa9baafa1d0..83ee278187c0 100644 --- a/executor/oci/spec_windows.go +++ b/executor/oci/spec_windows.go @@ -63,3 +63,7 @@ func getTracingSocketMount(socket string) specs.Mount { func getTracingSocket() string { return fmt.Sprintf("npipe://%s", filepath.ToSlash(tracingSocketPath)) } + +func cgroupNamespaceSupported() bool { + return false +} diff --git a/frontend/dockerfile/dockerfile_test.go b/frontend/dockerfile/dockerfile_test.go index 3ebfb6198f03..1c710b5a8d87 100644 --- a/frontend/dockerfile/dockerfile_test.go +++ b/frontend/dockerfile/dockerfile_test.go @@ -5193,10 +5193,27 @@ func testCgroupParent(t *testing.T, sb integration.Sandbox) { t.SkipNow() } + if _, err := os.Lstat("/sys/fs/cgroup/cgroup.subtree_control"); os.IsNotExist(err) { + t.Skipf("test requires cgroup v2") + } + + cgroupName := "test." + identity.NewID() + + err := os.MkdirAll(filepath.Join("/sys/fs/cgroup", cgroupName), 0755) + require.NoError(t, err) + + defer func() { + err := os.RemoveAll(filepath.Join("/sys/fs/cgroup", cgroupName)) + require.NoError(t, err) + }() + + err = os.WriteFile(filepath.Join("/sys/fs/cgroup", cgroupName, "pids.max"), []byte("10"), 0644) + require.NoError(t, err) + f := getFrontend(t, sb) dockerfile := []byte(` FROM alpine AS base -RUN cat /proc/self/cgroup > /out +RUN mkdir /out; (for i in $(seq 1 10); do sleep 1 & done 2>/out/error); cat /proc/self/cgroup > /out/cgroup FROM scratch COPY --from=base /out / `) @@ -5215,7 +5232,7 @@ COPY --from=base /out / _, err = f.Solve(sb.Context(), c, client.SolveOpt{ FrontendAttrs: map[string]string{ - "cgroup-parent": "foocgroup", + "cgroup-parent": cgroupName, }, LocalDirs: map[string]string{ dockerui.DefaultLocalNameDockerfile: dir, @@ -5230,9 +5247,14 @@ COPY --from=base /out / }, nil) require.NoError(t, err) - dt, err := os.ReadFile(filepath.Join(destDir, "out")) + dt, err := os.ReadFile(filepath.Join(destDir, "cgroup")) + require.NoError(t, err) + // cgroupns does not leak the parent cgroup name + require.NotContains(t, strings.TrimSpace(string(dt)), `foocgroup`) + + dt, err = os.ReadFile(filepath.Join(destDir, "error")) require.NoError(t, err) - require.Contains(t, strings.TrimSpace(string(dt)), `/foocgroup/buildkit/`) + require.Contains(t, strings.TrimSpace(string(dt)), `Resource temporarily unavailable`) } func testNamedImageContext(t *testing.T, sb integration.Sandbox) {