diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go index 08a9f3c0ad6e1..dbb2b9beda5fe 100644 --- a/api/server/router/container/container_routes.go +++ b/api/server/router/container/container_routes.go @@ -628,6 +628,14 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo } } + if versions.LessThan(version, "1.45") { + for _, m := range hostConfig.Mounts { + if m.VolumeOptions != nil && m.VolumeOptions.Subpath != "" { + return errdefs.InvalidParameter(errors.New("VolumeOptions.Subpath needs API v1.45 or newer")) + } + } + } + var warnings []string if warn, err := handleMACAddressBC(config, hostConfig, networkingConfig, version); err != nil { return err diff --git a/api/swagger.yaml b/api/swagger.yaml index 90556d75ed284..4cdffb6a38f05 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -423,6 +423,10 @@ definitions: type: "object" additionalProperties: type: "string" + Subpath: + description: "Source path inside the volume. Must be relative without any back traversals." + type: "string" + example: "dir-inside-volume/subdirectory" TmpfsOptions: description: "Optional configuration for the `tmpfs` type." type: "object" diff --git a/api/types/mount/mount.go b/api/types/mount/mount.go index 57edf2ef18304..6fe04da257942 100644 --- a/api/types/mount/mount.go +++ b/api/types/mount/mount.go @@ -96,6 +96,7 @@ type BindOptions struct { type VolumeOptions struct { NoCopy bool `json:",omitempty"` Labels map[string]string `json:",omitempty"` + Subpath string `json:",omitempty"` DriverConfig *Driver `json:",omitempty"` } diff --git a/container/container.go b/container/container.go index e73f05654fcd3..018300350d37b 100644 --- a/container/container.go +++ b/container/container.go @@ -514,14 +514,14 @@ func (container *Container) AddMountPointWithVolume(destination string, vol volu } // UnmountVolumes unmounts all volumes -func (container *Container) UnmountVolumes(volumeEventLog func(name string, action events.Action, attributes map[string]string)) error { +func (container *Container) UnmountVolumes(ctx context.Context, volumeEventLog func(name string, action events.Action, attributes map[string]string)) error { var errs []string for _, volumeMount := range container.MountPoints { if volumeMount.Volume == nil { continue } - if err := volumeMount.Cleanup(); err != nil { + if err := volumeMount.Cleanup(ctx); err != nil { errs = append(errs, err.Error()) continue } diff --git a/container/container_unix.go b/container/container_unix.go index 80cf5e58dd970..66bcacd9638fd 100644 --- a/container/container_unix.go +++ b/container/container_unix.go @@ -15,8 +15,6 @@ import ( "github.com/docker/docker/api/types/events" mounttypes "github.com/docker/docker/api/types/mount" swarmtypes "github.com/docker/docker/api/types/swarm" - "github.com/docker/docker/pkg/stringid" - "github.com/docker/docker/volume" volumemounts "github.com/docker/docker/volume/mounts" "github.com/moby/sys/mount" "github.com/opencontainers/selinux/go-selinux/label" @@ -129,34 +127,11 @@ func (container *Container) NetworkMounts() []Mount { } // CopyImagePathContent copies files in destination to the volume. -func (container *Container) CopyImagePathContent(v volume.Volume, destination string) error { - rootfs, err := container.GetResourcePath(destination) - if err != nil { - return err - } - - if _, err := os.Stat(rootfs); err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - - id := stringid.GenerateRandomID() - path, err := v.Mount(id) - if err != nil { - return err - } - - defer func() { - if err := v.Unmount(id); err != nil { - log.G(context.TODO()).Warnf("error while unmounting volume %s: %v", v.Name(), err) - } - }() - if err := label.Relabel(path, container.MountLabel, true); err != nil && !errors.Is(err, syscall.ENOTSUP) { +func (container *Container) CopyImagePathContent(volumePath, destination string) error { + if err := label.Relabel(volumePath, container.MountLabel, true); err != nil && !errors.Is(err, syscall.ENOTSUP) { return err } - return copyExistingContents(rootfs, path) + return copyExistingContents(destination, volumePath) } // ShmResourcePath returns path to shm @@ -396,7 +371,7 @@ func (container *Container) DetachAndUnmount(volumeEventLog func(name string, ac Warn("Unable to unmount") } } - return container.UnmountVolumes(volumeEventLog) + return container.UnmountVolumes(ctx, volumeEventLog) } // ignoreUnsupportedXAttrs ignores errors when extended attributes @@ -419,9 +394,13 @@ func copyExistingContents(source, destination string) error { return err } if len(dstList) != 0 { - // destination is not empty, do not copy + log.G(context.TODO()).WithFields(log.Fields{ + "source": source, + "destination": destination, + }).Debug("destination is not empty, do not copy") return nil } + return fs.CopyDir(destination, source, ignoreUnsupportedXAttrs()) } diff --git a/container/container_windows.go b/container/container_windows.go index bceedcb637fa5..bfebdbad189b7 100644 --- a/container/container_windows.go +++ b/container/container_windows.go @@ -1,6 +1,7 @@ package container // import "github.com/docker/docker/container" import ( + "context" "fmt" "os" "path/filepath" @@ -128,7 +129,7 @@ func (container *Container) ConfigMounts() []Mount { // On Windows it only delegates to `UnmountVolumes` since there is nothing to // force unmount. func (container *Container) DetachAndUnmount(volumeEventLog func(name string, action events.Action, attributes map[string]string)) error { - return container.UnmountVolumes(volumeEventLog) + return container.UnmountVolumes(context.TODO(), volumeEventLog) } // TmpfsMounts returns the list of tmpfs mounts diff --git a/daemon/container_operations_unix.go b/daemon/container_operations_unix.go index 6a23a4ca92b13..120069e631597 100644 --- a/daemon/container_operations_unix.go +++ b/daemon/container_operations_unix.go @@ -99,6 +99,45 @@ func (daemon *Daemon) getPIDContainer(id string) (*container.Container, error) { return ctr, nil } +// setupContainerDirs sets up base container directories (root, ipc, tmpfs and secrets). +func (daemon *Daemon) setupContainerDirs(c *container.Container) (_ []container.Mount, err error) { + if err := daemon.setupContainerMountsRoot(c); err != nil { + return nil, err + } + + if err := daemon.setupIPCDirs(c); err != nil { + return nil, err + } + + if err := daemon.setupSecretDir(c); err != nil { + return nil, err + } + defer func() { + if err != nil { + daemon.cleanupSecretDir(c) + } + }() + + var ms []container.Mount + if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() { + ms = append(ms, c.IpcMounts()...) + } + + tmpfsMounts, err := c.TmpfsMounts() + if err != nil { + return nil, err + } + ms = append(ms, tmpfsMounts...) + + secretMounts, err := c.SecretMounts() + if err != nil { + return nil, err + } + ms = append(ms, secretMounts...) + + return ms, nil +} + func (daemon *Daemon) setupIPCDirs(c *container.Container) error { ipcMode := c.HostConfig.IpcMode diff --git a/daemon/containerfs_linux.go b/daemon/containerfs_linux.go index 384c86b3f0329..12b1cb68284d1 100644 --- a/daemon/containerfs_linux.go +++ b/daemon/containerfs_linux.go @@ -17,6 +17,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/container" + "github.com/docker/docker/internal/compatcontext" "github.com/docker/docker/internal/mounttree" "github.com/docker/docker/internal/unshare" "github.com/docker/docker/pkg/fileutils" @@ -54,6 +55,8 @@ type containerFSView struct { // openContainerFS opens a new view of the container's filesystem. func (daemon *Daemon) openContainerFS(container *container.Container) (_ *containerFSView, err error) { + ctx := context.TODO() + if err := daemon.Mount(container); err != nil { return nil, err } @@ -63,13 +66,15 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai } }() - mounts, err := daemon.setupMounts(container) + mounts, cleanup, err := daemon.setupMounts(ctx, container) if err != nil { return nil, err } defer func() { + ctx := compatcontext.WithoutCancel(ctx) + cleanup(ctx) if err != nil { - _ = container.UnmountVolumes(daemon.LogVolumeEvent) + _ = container.UnmountVolumes(ctx, daemon.LogVolumeEvent) } }() @@ -207,7 +212,7 @@ func (vw *containerFSView) Close() error { runtime.SetFinalizer(vw, nil) close(vw.todo) err := multierror.Append(nil, <-vw.done) - err = multierror.Append(err, vw.ctr.UnmountVolumes(vw.d.LogVolumeEvent)) + err = multierror.Append(err, vw.ctr.UnmountVolumes(context.TODO(), vw.d.LogVolumeEvent)) err = multierror.Append(err, vw.d.Unmount(vw.ctr)) return err.ErrorOrNil() } diff --git a/daemon/create.go b/daemon/create.go index c524c03b8ed04..27bb329f2508f 100644 --- a/daemon/create.go +++ b/daemon/create.go @@ -222,7 +222,7 @@ func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts return nil, err } - if err := daemon.createContainerOSSpecificSettings(ctr, opts.params.Config, opts.params.HostConfig); err != nil { + if err := daemon.createContainerOSSpecificSettings(ctx, ctr, opts.params.Config, opts.params.HostConfig); err != nil { return nil, err } diff --git a/daemon/create_unix.go b/daemon/create_unix.go index 228d4a62e9266..00b080e547725 100644 --- a/daemon/create_unix.go +++ b/daemon/create_unix.go @@ -12,13 +12,17 @@ import ( containertypes "github.com/docker/docker/api/types/container" mounttypes "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" + "github.com/docker/docker/errdefs" + "github.com/docker/docker/internal/compatcontext" "github.com/docker/docker/oci" + volumemounts "github.com/docker/docker/volume/mounts" volumeopts "github.com/docker/docker/volume/service/opts" "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" ) // createContainerOSSpecificSettings performs host-OS specific container create functionality -func (daemon *Daemon) createContainerOSSpecificSettings(container *container.Container, config *containertypes.Config, hostConfig *containertypes.HostConfig) error { +func (daemon *Daemon) createContainerOSSpecificSettings(ctx context.Context, container *container.Container, config *containertypes.Config, hostConfig *containertypes.HostConfig) error { if err := daemon.Mount(container); err != nil { return err } @@ -45,7 +49,7 @@ func (daemon *Daemon) createContainerOSSpecificSettings(container *container.Con // Skip volumes for which we already have something mounted on that // destination because of a --volume-from. if container.HasMountFor(destination) { - log.G(context.TODO()).WithField("container", container.ID).WithField("destination", spec).Debug("mountpoint already exists, skipping anonymous volume") + log.G(ctx).WithField("container", container.ID).WithField("destination", spec).Debug("mountpoint already exists, skipping anonymous volume") // Not an error, this could easily have come from the image config. continue } @@ -70,12 +74,12 @@ func (daemon *Daemon) createContainerOSSpecificSettings(container *container.Con container.AddMountPointWithVolume(destination, &volumeWrapper{v: v, s: daemon.volumes}, true) } - return daemon.populateVolumes(container) + return daemon.populateVolumes(ctx, container) } // populateVolumes copies data from the container's rootfs into the volume for non-binds. // this is only called when the container is created. -func (daemon *Daemon) populateVolumes(c *container.Container) error { +func (daemon *Daemon) populateVolumes(ctx context.Context, c *container.Container) error { for _, mnt := range c.MountPoints { if mnt.Volume == nil { continue @@ -85,10 +89,41 @@ func (daemon *Daemon) populateVolumes(c *container.Container) error { continue } - log.G(context.TODO()).Debugf("copying image data from %s:%s, to %s", c.ID, mnt.Destination, mnt.Name) - if err := c.CopyImagePathContent(mnt.Volume, mnt.Destination); err != nil { + if err := daemon.populateVolume(ctx, c, mnt); err != nil { return err } } return nil } + +func (daemon *Daemon) populateVolume(ctx context.Context, c *container.Container, mnt *volumemounts.MountPoint) error { + ctrDestPath, err := c.GetResourcePath(mnt.Destination) + if err != nil { + return err + } + + if _, err := os.Stat(ctrDestPath); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + + volumePath, cleanup, err := mnt.Setup(ctx, c.MountLabel, daemon.idMapping.RootPair(), nil) + if err != nil { + if errdefs.IsNotFound(err) { + return nil + } + log.G(ctx).WithError(err).Debugf("can't copy data from %s:%s, to %s", c.ID, mnt.Destination, volumePath) + return errors.Wrapf(err, "failed to populate volume") + } + defer mnt.Cleanup(compatcontext.WithoutCancel(ctx)) + defer cleanup(compatcontext.WithoutCancel(ctx)) + + log.G(ctx).Debugf("copying image data from %s:%s, to %s", c.ID, mnt.Destination, volumePath) + if err := c.CopyImagePathContent(volumePath, ctrDestPath); err != nil { + return err + } + + return nil +} diff --git a/daemon/create_windows.go b/daemon/create_windows.go index c7220601f3c02..d1902b3266c7c 100644 --- a/daemon/create_windows.go +++ b/daemon/create_windows.go @@ -11,7 +11,7 @@ import ( ) // createContainerOSSpecificSettings performs host-OS specific container create functionality -func (daemon *Daemon) createContainerOSSpecificSettings(container *container.Container, config *containertypes.Config, hostConfig *containertypes.HostConfig) error { +func (daemon *Daemon) createContainerOSSpecificSettings(ctx context.Context, container *container.Container, config *containertypes.Config, hostConfig *containertypes.HostConfig) error { if containertypes.Isolation.IsDefault(hostConfig.Isolation) { // Make sure the host config has the default daemon isolation if not specified by caller. hostConfig.Isolation = daemon.defaultIsolation @@ -34,7 +34,7 @@ func (daemon *Daemon) createContainerOSSpecificSettings(container *container.Con // Create the volume in the volume driver. If it doesn't exist, // a new one will be created. - v, err := daemon.volumes.Create(context.TODO(), "", volumeDriver, volumeopts.WithCreateReference(container.ID)) + v, err := daemon.volumes.Create(ctx, "", volumeDriver, volumeopts.WithCreateReference(container.ID)) if err != nil { return err } diff --git a/daemon/daemon.go b/daemon/daemon.go index 77c264ecf837e..0aa5c5555b905 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -466,7 +466,7 @@ func (daemon *Daemon) restore(cfg *configStore) error { ces.ExitCode = 255 } c.SetStopped(&ces) - daemon.Cleanup(c) + daemon.Cleanup(context.TODO(), c) if err := c.CheckpointTo(daemon.containersReplica); err != nil { baseLogger.WithError(err).Error("failed to update stopped container state") } diff --git a/daemon/monitor.go b/daemon/monitor.go index 4734fe45e5148..7c47ae0786c4b 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -89,7 +89,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine "exitCode": strconv.Itoa(exitStatus.ExitCode), "execDuration": strconv.Itoa(int(execDuration.Seconds())), } - daemon.Cleanup(c) + daemon.Cleanup(context.TODO(), c) if restart { c.RestartCount++ diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index c7fdedcb31f10..97bc8e707afb6 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -532,47 +532,8 @@ func inSlice(slice []string, s string) bool { } // withMounts sets the container's mounts -func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container) coci.SpecOpts { +func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container, ms []container.Mount) coci.SpecOpts { return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) (err error) { - if err := daemon.setupContainerMountsRoot(c); err != nil { - return err - } - - if err := daemon.setupIPCDirs(c); err != nil { - return err - } - - defer func() { - if err != nil { - daemon.cleanupSecretDir(c) - } - }() - - if err := daemon.setupSecretDir(c); err != nil { - return err - } - - ms, err := daemon.setupMounts(c) - if err != nil { - return err - } - - if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() { - ms = append(ms, c.IpcMounts()...) - } - - tmpfsMounts, err := c.TmpfsMounts() - if err != nil { - return err - } - ms = append(ms, tmpfsMounts...) - - secretMounts, err := c.SecretMounts() - if err != nil { - return err - } - ms = append(ms, secretMounts...) - sort.Sort(mounts(ms)) mounts := ms @@ -1093,7 +1054,7 @@ func WithUser(c *container.Container) coci.SpecOpts { } } -func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (retSpec *specs.Spec, err error) { +func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container, mounts []container.Mount) (retSpec *specs.Spec, err error) { var ( opts []coci.SpecOpts s = oci.DefaultSpec() @@ -1108,7 +1069,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c WithNamespaces(daemon, c), WithCapabilities(c), WithSeccomp(daemon, c), - withMounts(daemon, daemonCfg, c), + withMounts(daemon, daemonCfg, c, mounts), withLibnetwork(daemon, &daemonCfg.Config, c), WithApparmor(c), WithSelinux(c), diff --git a/daemon/oci_linux_test.go b/daemon/oci_linux_test.go index d5ea294318c2b..b2008394dfb64 100644 --- a/daemon/oci_linux_test.go +++ b/daemon/oci_linux_test.go @@ -92,7 +92,7 @@ func TestTmpfsDevShmNoDupMount(t *testing.T) { } d := setupFakeDaemon(t, c) - _, err := d.createSpec(context.TODO(), &configStore{}, c) + _, err := d.createSpec(context.TODO(), &configStore{}, c, nil) assert.Check(t, err) } @@ -110,7 +110,7 @@ func TestIpcPrivateVsReadonly(t *testing.T) { } d := setupFakeDaemon(t, c) - s, err := d.createSpec(context.TODO(), &configStore{}, c) + s, err := d.createSpec(context.TODO(), &configStore{}, c, nil) assert.Check(t, err) // Find the /dev/shm mount in ms, check it does not have ro @@ -139,7 +139,7 @@ func TestSysctlOverride(t *testing.T) { d := setupFakeDaemon(t, c) // Ensure that the implicit sysctl is set correctly. - s, err := d.createSpec(context.TODO(), &configStore{}, c) + s, err := d.createSpec(context.TODO(), &configStore{}, c, nil) assert.NilError(t, err) assert.Equal(t, s.Hostname, "foobar") assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname) @@ -155,14 +155,14 @@ func TestSysctlOverride(t *testing.T) { assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname) c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024" - s, err = d.createSpec(context.TODO(), &configStore{}, c) + s, err = d.createSpec(context.TODO(), &configStore{}, c, nil) assert.NilError(t, err) assert.Equal(t, s.Hostname, "foobar") assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"]) assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"]) // Ensure the ping_group_range is not set on a daemon with user-namespaces enabled - s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c) + s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c, nil) assert.NilError(t, err) _, ok := s.Linux.Sysctl["net.ipv4.ping_group_range"] assert.Assert(t, !ok) @@ -170,7 +170,7 @@ func TestSysctlOverride(t *testing.T) { // Ensure the ping_group_range is set on a container in "host" userns mode // on a daemon with user-namespaces enabled c.HostConfig.UsernsMode = "host" - s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c) + s, err = d.createSpec(context.TODO(), &configStore{Config: config.Config{RemappedRoot: "dummy:dummy"}}, c, nil) assert.NilError(t, err) assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647") } @@ -189,7 +189,7 @@ func TestSysctlOverrideHost(t *testing.T) { d := setupFakeDaemon(t, c) // Ensure that the implicit sysctl is not set - s, err := d.createSpec(context.TODO(), &configStore{}, c) + s, err := d.createSpec(context.TODO(), &configStore{}, c, nil) assert.NilError(t, err) assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "") assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "") @@ -197,7 +197,7 @@ func TestSysctlOverrideHost(t *testing.T) { // Set an explicit sysctl. c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024" - s, err = d.createSpec(context.TODO(), &configStore{}, c) + s, err = d.createSpec(context.TODO(), &configStore{}, c, nil) assert.NilError(t, err) assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"]) } @@ -225,7 +225,7 @@ func TestDefaultResources(t *testing.T) { } d := setupFakeDaemon(t, c) - s, err := d.createSpec(context.Background(), &configStore{}, c) + s, err := d.createSpec(context.Background(), &configStore{}, c, nil) assert.NilError(t, err) checkResourcesAreUnset(t, s.Linux.Resources) } diff --git a/daemon/oci_windows.go b/daemon/oci_windows.go index ef6030db073af..31caa283878e8 100644 --- a/daemon/oci_windows.go +++ b/daemon/oci_windows.go @@ -30,30 +30,11 @@ const ( credentialSpecFileLocation = "CredentialSpecs" ) -func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (*specs.Spec, error) { - img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), imagetypes.GetImageOpts{}) - if err != nil { - return nil, err - } - if err := image.CheckOS(img.OperatingSystem()); err != nil { - return nil, err - } - - s := oci.DefaultSpec() - - if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil { - return nil, err - } - - linkedEnv, err := daemon.setupLinkedContainers(c) - if err != nil { - return nil, err - } - +// setupContainerDirs sets up base container directories (root, ipc, tmpfs and secrets). +func (daemon *Daemon) setupContainerDirs(c *container.Container) ([]container.Mount, error) { // Note, unlike Unix, we do NOT call into SetupWorkingDirectory as // this is done in VMCompute. Further, we couldn't do it for Hyper-V // containers anyway. - if err := daemon.setupSecretDir(c); err != nil { return nil, err } @@ -62,25 +43,6 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c return nil, err } - // In s.Mounts - mounts, err := daemon.setupMounts(c) - if err != nil { - return nil, err - } - - var isHyperV bool - if c.HostConfig.Isolation.IsDefault() { - // Container using default isolation, so take the default from the daemon configuration - isHyperV = daemon.defaultIsolation.IsHyperV() - } else { - // Container may be requesting an explicit isolation mode. - isHyperV = c.HostConfig.Isolation.IsHyperV() - } - - if isHyperV { - s.Windows.HyperV = &specs.WindowsHyperV{} - } - // If the container has not been started, and has configs or secrets // secrets, create symlinks to each config and secret. If it has been // started before, the symlinks should have already been created. Also, it @@ -90,7 +52,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) { // The container file system is mounted before this function is called, // except for Hyper-V containers, so mount it here in that case. - if isHyperV { + if daemon.isHyperV(c) { if err := daemon.Mount(c); err != nil { return nil, err } @@ -108,6 +70,8 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c if err != nil { return nil, err } + + var mounts []container.Mount if secretMounts != nil { mounts = append(mounts, secretMounts...) } @@ -116,6 +80,33 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c mounts = append(mounts, configMounts...) } + return mounts, nil +} + +func (daemon *Daemon) isHyperV(c *container.Container) bool { + if c.HostConfig.Isolation.IsDefault() { + // Container using default isolation, so take the default from the daemon configuration + return daemon.defaultIsolation.IsHyperV() + } + // Container may be requesting an explicit isolation mode. + return c.HostConfig.Isolation.IsHyperV() +} + +func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container, mounts []container.Mount) (*specs.Spec, error) { + img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), imagetypes.GetImageOpts{}) + if err != nil { + return nil, err + } + if err := image.CheckOS(img.OperatingSystem()); err != nil { + return nil, err + } + + s := oci.DefaultSpec() + + if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil { + return nil, err + } + for _, mount := range mounts { m := specs.Mount{ Source: mount.Source, @@ -127,6 +118,16 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c s.Mounts = append(s.Mounts, m) } + linkedEnv, err := daemon.setupLinkedContainers(c) + if err != nil { + return nil, err + } + + isHyperV := daemon.isHyperV(c) + if isHyperV { + s.Windows.HyperV = &specs.WindowsHyperV{} + } + // In s.Process s.Process.Cwd = c.Config.WorkingDir s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv) diff --git a/daemon/start.go b/daemon/start.go index 24e72e2248926..7e6690295de3b 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -139,7 +139,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore } container.Reset(false) - daemon.Cleanup(container) + daemon.Cleanup(compatcontext.WithoutCancel(ctx), container) // if containers AutoRemove flag is set, remove it after clean up if container.HostConfig.AutoRemove { container.Unlock() @@ -159,7 +159,19 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore return err } - spec, err := daemon.createSpec(ctx, daemonCfg, container) + mnts, err := daemon.setupContainerDirs(container) + if err != nil { + return err + } + + m, cleanup, err := daemon.setupMounts(ctx, container) + if err != nil { + return err + } + mnts = append(mnts, m...) + defer cleanup(compatcontext.WithoutCancel(ctx)) + + spec, err := daemon.createSpec(ctx, daemonCfg, container, mnts) if err != nil { // Any error that occurs while creating the spec, even if it's the // result of an invalid container config, must be considered a System @@ -248,19 +260,19 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore // Cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. -func (daemon *Daemon) Cleanup(container *container.Container) { +func (daemon *Daemon) Cleanup(ctx context.Context, container *container.Container) { // Microsoft HCS containers get in a bad state if host resources are // released while the container still exists. if ctr, ok := container.C8dContainer(); ok { if err := ctr.Delete(context.Background()); err != nil { - log.G(context.TODO()).Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err) + log.G(ctx).Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err) } } daemon.releaseNetwork(container) if err := container.UnmountIpcMount(); err != nil { - log.G(context.TODO()).Warnf("%s cleanup: failed to unmount IPC: %s", container.ID, err) + log.G(ctx).Warnf("%s cleanup: failed to unmount IPC: %s", container.ID, err) } if err := daemon.conditionalUnmountOnCleanup(container); err != nil { @@ -272,11 +284,11 @@ func (daemon *Daemon) Cleanup(container *container.Container) { } if err := container.UnmountSecrets(); err != nil { - log.G(context.TODO()).Warnf("%s cleanup: failed to unmount secrets: %s", container.ID, err) + log.G(ctx).Warnf("%s cleanup: failed to unmount secrets: %s", container.ID, err) } if err := recursiveUnmount(container.Root); err != nil { - log.G(context.TODO()).WithError(err).WithField("container", container.ID).Warn("Error while cleaning up container resource mounts.") + log.G(ctx).WithError(err).WithField("container", container.ID).Warn("Error while cleaning up container resource mounts.") } for _, eConfig := range container.ExecCommands.Commands() { @@ -284,8 +296,8 @@ func (daemon *Daemon) Cleanup(container *container.Container) { } if container.BaseFS != "" { - if err := container.UnmountVolumes(daemon.LogVolumeEvent); err != nil { - log.G(context.TODO()).Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err) + if err := container.UnmountVolumes(ctx, daemon.LogVolumeEvent); err != nil { + log.G(ctx).Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err) } } diff --git a/daemon/volumes_unix.go b/daemon/volumes_unix.go index 959124535ab3d..ad5fdc6383447 100644 --- a/daemon/volumes_unix.go +++ b/daemon/volumes_unix.go @@ -3,15 +3,19 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" "fmt" "os" "sort" "strconv" "strings" + "github.com/containerd/log" "github.com/docker/docker/api/types/events" mounttypes "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" + "github.com/docker/docker/internal/cleanups" + "github.com/docker/docker/internal/compatcontext" volumemounts "github.com/docker/docker/volume/mounts" "github.com/pkg/errors" ) @@ -19,23 +23,34 @@ import ( // setupMounts iterates through each of the mount points for a container and // calls Setup() on each. It also looks to see if is a network mount such as // /etc/resolv.conf, and if it is not, appends it to the array of mounts. -func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { +// +// The cleanup function should be called as soon as the container has been +// started. +func (daemon *Daemon) setupMounts(ctx context.Context, c *container.Container) ([]container.Mount, func(context.Context) error, error) { var mounts []container.Mount // TODO: tmpfs mounts should be part of Mountpoints tmpfsMounts := make(map[string]bool) tmpfsMountInfo, err := c.TmpfsMounts() if err != nil { - return nil, err + return nil, nil, err } for _, m := range tmpfsMountInfo { tmpfsMounts[m.Destination] = true } + + cleanups := cleanups.Composite{} + defer func() { + if err := cleanups.Call(compatcontext.WithoutCancel(ctx)); err != nil { + log.G(ctx).WithError(err).Warn("failed to cleanup temporary mounts created by MountPoint.Setup") + } + }() + for _, m := range c.MountPoints { if tmpfsMounts[m.Destination] { continue } if err := daemon.lazyInitializeVolume(c.ID, m); err != nil { - return nil, err + return nil, nil, err } // If the daemon is being shutdown, we should not let a container start if it is trying to // mount the socket the daemon is listening on. During daemon shutdown, the socket @@ -48,10 +63,12 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er return nil } - path, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc) + path, clean, err := m.Setup(ctx, c.MountLabel, daemon.idMapping.RootPair(), checkfunc) if err != nil { - return nil, err + return nil, nil, err } + cleanups.Add(clean) + if !c.TrySetNetworkMount(m.Destination, path) { mnt := container.Mount{ Source: path, @@ -61,13 +78,13 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er } if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil { if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyNonRecursive { - return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive") + return nil, nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive") } if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyForceRecursive { - return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive") + return nil, nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive") } if m.Spec.BindOptions.ReadOnlyNonRecursive && m.Spec.BindOptions.ReadOnlyForceRecursive { - return nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive") + return nil, nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive") } mnt.NonRecursive = m.Spec.BindOptions.NonRecursive mnt.ReadOnlyNonRecursive = m.Spec.BindOptions.ReadOnlyNonRecursive @@ -98,11 +115,11 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er // up to the user to make sure the file has proper ownership for userns if strings.Index(mnt.Source, daemon.repository) == 0 { if err := os.Chown(mnt.Source, rootIDs.UID, rootIDs.GID); err != nil { - return nil, err + return nil, nil, err } } } - return append(mounts, netMounts...), nil + return append(mounts, netMounts...), cleanups.Release(), nil } // sortMounts sorts an array of mounts in lexicographic order. This ensure that diff --git a/daemon/volumes_windows.go b/daemon/volumes_windows.go index 574cc48f1cf8b..83a3eb06d2aa6 100644 --- a/daemon/volumes_windows.go +++ b/daemon/volumes_windows.go @@ -1,10 +1,14 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" "sort" + "github.com/containerd/log" "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" + "github.com/docker/docker/internal/cleanups" + "github.com/docker/docker/internal/compatcontext" "github.com/docker/docker/pkg/idtools" volumemounts "github.com/docker/docker/volume/mounts" ) @@ -13,21 +17,31 @@ import ( // of the configured mounts on the container to the OCI mount structure // which will ultimately be passed into the oci runtime during container creation. // It also ensures each of the mounts are lexicographically sorted. - +// +// The cleanup function should be called as soon as the container has been +// started. +// // BUGBUG TODO Windows containerd. This would be much better if it returned // an array of runtime spec mounts, not container mounts. Then no need to // do multiple transitions. +func (daemon *Daemon) setupMounts(ctx context.Context, c *container.Container) ([]container.Mount, func(context.Context) error, error) { + cleanups := cleanups.Composite{} + defer func() { + if err := cleanups.Call(compatcontext.WithoutCancel(ctx)); err != nil { + log.G(ctx).WithError(err).Warn("failed to cleanup temporary mounts created by MountPoint.Setup") + } + }() -func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { var mnts []container.Mount for _, mount := range c.MountPoints { // type is volumemounts.MountPoint if err := daemon.lazyInitializeVolume(c.ID, mount); err != nil { - return nil, err + return nil, nil, err } - s, err := mount.Setup(c.MountLabel, idtools.Identity{}, nil) + s, c, err := mount.Setup(ctx, c.MountLabel, idtools.Identity{}, nil) if err != nil { - return nil, err + return nil, nil, err } + cleanups.Add(c) mnts = append(mnts, container.Mount{ Source: s, @@ -37,7 +51,7 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er } sort.Sort(mounts(mnts)) - return mnts, nil + return mnts, cleanups.Release(), nil } // setBindModeIfNull is platform specific processing which is a no-op on diff --git a/docs/api/version-history.md b/docs/api/version-history.md index 16fafdb7b4af7..deb0fe2d151a2 100644 --- a/docs/api/version-history.md +++ b/docs/api/version-history.md @@ -17,6 +17,9 @@ keywords: "API, Docker, rcli, REST, documentation" [Docker Engine API v1.45](https://docs.docker.com/engine/api/v1.45/) documentation +* `POST /containers/create` now supports `VolumeOptions.Subpath` which allows a + subpath of a named volume to be mounted. + ## v1.44 API changes [Docker Engine API v1.44](https://docs.docker.com/engine/api/v1.44/) documentation diff --git a/integration/internal/container/container.go b/integration/internal/container/container.go index 0974ce6bf113c..dac52999ae512 100644 --- a/integration/internal/container/container.go +++ b/integration/internal/container/container.go @@ -170,3 +170,28 @@ func Inspect(ctx context.Context, t *testing.T, apiClient client.APIClient, cont return c } + +type ContainerOutput struct { + Stdout, Stderr string +} + +// Output waits for the container to end running and returns its output. +func Output(ctx context.Context, client client.APIClient, id string) (ContainerOutput, error) { + logs, err := client.ContainerLogs(ctx, id, container.LogsOptions{Follow: true, ShowStdout: true, ShowStderr: true}) + if err != nil { + return ContainerOutput{}, err + } + + defer logs.Close() + + var stdoutBuf, stderrBuf bytes.Buffer + _, err = stdcopy.StdCopy(&stdoutBuf, &stderrBuf, logs) + if err != nil { + return ContainerOutput{}, err + } + + return ContainerOutput{ + Stdout: stdoutBuf.String(), + Stderr: stderrBuf.String(), + }, nil +} diff --git a/integration/volume/mount_test.go b/integration/volume/mount_test.go new file mode 100644 index 0000000000000..21e1186523903 --- /dev/null +++ b/integration/volume/mount_test.go @@ -0,0 +1,185 @@ +package volume + +import ( + "context" + "path/filepath" + "strings" + "testing" + + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/api/types/network" + "github.com/docker/docker/api/types/versions" + "github.com/docker/docker/api/types/volume" + "github.com/docker/docker/client" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/internal/safepath" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +func TestRunMountVolumeSubdir(t *testing.T) { + skip.If(t, versions.LessThan(testEnv.DaemonAPIVersion(), "1.45"), "skip test from new feature") + + ctx := setupTest(t) + apiClient := testEnv.APIClient() + + testVolumeName := setupTestVolume(t, apiClient) + + for _, tc := range []struct { + name string + opts mount.VolumeOptions + cmd []string + volumeTarget string + createErr string + startErr string + expected string + skipPlatform string + }{ + {name: "subdir", opts: mount.VolumeOptions{Subpath: "subdir"}, cmd: []string{"ls", "/volume"}, expected: "hello.txt"}, + {name: "subdir link", opts: mount.VolumeOptions{Subpath: "hack/good"}, cmd: []string{"ls", "/volume"}, expected: "hello.txt"}, + {name: "subdir with copy data", opts: mount.VolumeOptions{Subpath: "bin"}, volumeTarget: "/bin", cmd: []string{"ls", "/bin/busybox"}, expected: "/bin/busybox", skipPlatform: "windows:copy not supported on Windows"}, + {name: "file", opts: mount.VolumeOptions{Subpath: "bar.txt"}, cmd: []string{"cat", "/volume"}, expected: "foo", skipPlatform: "windows:file bind mounts not supported on Windows"}, + {name: "relative with backtracks", opts: mount.VolumeOptions{Subpath: "../../../../../../etc/passwd"}, cmd: []string{"cat", "/volume"}, createErr: "subpath must be a relative path within the volume"}, + {name: "not existing", opts: mount.VolumeOptions{Subpath: "not-existing-path"}, cmd: []string{"cat", "/volume"}, startErr: (&safepath.ErrNotAccessible{}).Error()}, + + {name: "mount link", opts: mount.VolumeOptions{Subpath: filepath.Join("hack", "root")}, cmd: []string{"ls", "/volume"}, startErr: (&safepath.ErrEscapesBase{}).Error()}, + {name: "mount link link", opts: mount.VolumeOptions{Subpath: filepath.Join("hack", "bad")}, cmd: []string{"ls", "/volume"}, startErr: (&safepath.ErrEscapesBase{}).Error()}, + } { + t.Run(tc.name, func(t *testing.T) { + if tc.skipPlatform != "" { + platform, reason, _ := strings.Cut(tc.skipPlatform, ":") + if testEnv.DaemonInfo.OSType == platform { + t.Skip(reason) + } + } + + cfg := containertypes.Config{ + Image: "busybox", + Cmd: tc.cmd, + } + hostCfg := containertypes.HostConfig{ + Mounts: []mount.Mount{ + { + Type: mount.TypeVolume, + Source: testVolumeName, + Target: "/volume", + VolumeOptions: &tc.opts, + }, + }, + } + if testEnv.DaemonInfo.OSType == "windows" { + hostCfg.Mounts[0].Target = `C:\volume` + } + if tc.volumeTarget != "" { + hostCfg.Mounts[0].Target = tc.volumeTarget + } + + ctrName := strings.ReplaceAll(t.Name(), "/", "_") + create, creatErr := apiClient.ContainerCreate(ctx, &cfg, &hostCfg, &network.NetworkingConfig{}, nil, ctrName) + id := create.ID + if id != "" { + defer apiClient.ContainerRemove(ctx, id, containertypes.RemoveOptions{Force: true}) + } + + if tc.createErr != "" { + assert.ErrorContains(t, creatErr, tc.createErr) + return + } + assert.NilError(t, creatErr, "container creation failed") + + startErr := apiClient.ContainerStart(ctx, id, containertypes.StartOptions{}) + if tc.startErr != "" { + assert.ErrorContains(t, startErr, tc.startErr) + return + } + assert.NilError(t, startErr) + + output, err := container.Output(ctx, apiClient, id) + assert.Check(t, err) + t.Logf("stdout:\n%s", output.Stdout) + t.Logf("stderr:\n%s", output.Stderr) + + inspect, err := apiClient.ContainerInspect(ctx, id) + if assert.Check(t, err) { + assert.Check(t, is.Equal(inspect.State.ExitCode, 0)) + } + + assert.Check(t, is.Equal(strings.TrimSpace(output.Stderr), "")) + assert.Check(t, is.Equal(strings.TrimSpace(output.Stdout), tc.expected)) + }) + } +} + +// setupTestVolume sets up a volume with: +// . +// |-- bar.txt (file with "foo") +// |-- bin (directory) +// |-- subdir (directory) +// | |-- hello.txt (file with "world") +// |-- hack (directory) +// | |-- root (symlink to /) +// | |-- good (symlink to ../subdir) +// | |-- bad (symlink to root) +func setupTestVolume(t *testing.T, client client.APIClient) string { + t.Helper() + ctx := context.Background() + + volumeName := t.Name() + "-volume" + + err := client.VolumeRemove(ctx, volumeName, true) + assert.NilError(t, err, "failed to clean volume") + + _, err = client.VolumeCreate(ctx, volume.CreateOptions{ + Name: volumeName, + }) + assert.NilError(t, err, "failed to setup volume") + + mount := mount.Mount{ + Type: mount.TypeVolume, + Source: volumeName, + Target: "/volume", + } + + rootFs := "/" + if testEnv.DaemonInfo.OSType == "windows" { + mount.Target = `C:\volume` + rootFs = `C:` + } + + initCmd := "echo foo > /volume/bar.txt && " + + "mkdir /volume/bin && " + + "mkdir /volume/subdir && " + + "echo world > /volume/subdir/hello.txt && " + + "mkdir /volume/hack && " + + "ln -s " + rootFs + " /volume/hack/root && " + + "ln -s ../subdir /volume/hack/good && " + + "ln -s root /volume/hack/bad &&" + + "mkdir /volume/hack/iwanttobehackedwithtoctou" + + opts := []func(*container.TestContainerConfig){ + container.WithMount(mount), + container.WithCmd("sh", "-c", initCmd+"; ls -lah /volume /volume/hack/"), + } + if testEnv.DaemonInfo.OSType == "windows" { + // Can't create symlinks under HyperV isolation + opts = append(opts, container.WithIsolation(containertypes.IsolationProcess)) + } + + cid := container.Run(ctx, t, client, opts...) + defer client.ContainerRemove(ctx, cid, containertypes.RemoveOptions{Force: true}) + output, err := container.Output(ctx, client, cid) + + t.Logf("Setup stderr:\n%s", output.Stderr) + t.Logf("Setup stdout:\n%s", output.Stdout) + + assert.NilError(t, err) + assert.Assert(t, is.Equal(output.Stderr, "")) + + inspect, err := client.ContainerInspect(ctx, cid) + assert.NilError(t, err) + assert.Assert(t, is.Equal(inspect.State.ExitCode, 0)) + + return volumeName +} diff --git a/internal/cleanups/composite.go b/internal/cleanups/composite.go new file mode 100644 index 0000000000000..3c00cd6d75a3f --- /dev/null +++ b/internal/cleanups/composite.go @@ -0,0 +1,44 @@ +package cleanups + +import ( + "context" + + "github.com/docker/docker/internal/multierror" +) + +type Composite struct { + cleanups []func(context.Context) error +} + +// Add adds a cleanup to be called. +func (c *Composite) Add(f func(context.Context) error) { + c.cleanups = append(c.cleanups, f) +} + +// Call calls all cleanups in reverse order and returns an error combining all +// non-nil errors. +func (c *Composite) Call(ctx context.Context) error { + err := call(ctx, c.cleanups) + c.cleanups = nil + return err +} + +// Release removes all cleanups, turning Call into a no-op. +// Caller still can call the cleanups by calling the returned function +// which is equivalent to calling the Call before Release was called. +func (c *Composite) Release() func(context.Context) error { + cleanups := c.cleanups + c.cleanups = nil + return func(ctx context.Context) error { + return call(ctx, cleanups) + } +} + +func call(ctx context.Context, cleanups []func(context.Context) error) error { + var errs []error + for idx := len(cleanups) - 1; idx >= 0; idx-- { + c := cleanups[idx] + errs = append(errs, c(ctx)) + } + return multierror.Join(errs...) +} diff --git a/internal/cleanups/composite_test.go b/internal/cleanups/composite_test.go new file mode 100644 index 0000000000000..049a59dd61e7d --- /dev/null +++ b/internal/cleanups/composite_test.go @@ -0,0 +1,54 @@ +package cleanups + +import ( + "context" + "errors" + "fmt" + "testing" + + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" +) + +func TestCall(t *testing.T) { + c := Composite{} + var err1 = errors.New("error1") + var err2 = errors.New("error2") + var errX = errors.New("errorX") + var errY = errors.New("errorY") + var errZ = errors.New("errorZ") + var errYZ = errors.Join(errY, errZ) + + c.Add(func(ctx context.Context) error { + return err1 + }) + c.Add(func(ctx context.Context) error { + return nil + }) + c.Add(func(ctx context.Context) error { + return fmt.Errorf("something happened: %w", err2) + }) + c.Add(func(ctx context.Context) error { + return errors.Join(errX, fmt.Errorf("joined: %w", errYZ)) + }) + + err := c.Call(context.Background()) + + errs := err.(interface{ Unwrap() []error }).Unwrap() + + assert.Check(t, is.ErrorContains(err, err1.Error())) + assert.Check(t, is.ErrorContains(err, err2.Error())) + assert.Check(t, is.ErrorContains(err, errX.Error())) + assert.Check(t, is.ErrorContains(err, errY.Error())) + assert.Check(t, is.ErrorContains(err, errZ.Error())) + assert.Check(t, is.ErrorContains(err, "something happened: "+err2.Error())) + + t.Logf(err.Error()) + assert.Assert(t, is.Len(errs, 3)) + + // Cleanups executed in reverse order. + assert.Check(t, is.ErrorIs(errs[2], err1)) + assert.Check(t, is.ErrorIs(errs[1], err2)) + assert.Check(t, is.ErrorIs(errs[0], errX)) + assert.Check(t, is.ErrorIs(errs[0], errYZ)) +} diff --git a/internal/safepath/common.go b/internal/safepath/common.go new file mode 100644 index 0000000000000..5beb2e6e431ff --- /dev/null +++ b/internal/safepath/common.go @@ -0,0 +1,66 @@ +package safepath + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" +) + +// evaluatePath evaluates symlinks in the concatenation of path and subpath. If +// err is nil, resolvedBasePath will contain result of resolving all symlinks +// in the given path, and resolvedSubpath will contain a relative path rooted +// at the resolvedBasePath pointing to the concatenation after resolving all +// symlinks. +func evaluatePath(path, subpath string) (resolvedBasePath string, resolvedSubpath string, err error) { + baseResolved, err := filepath.EvalSymlinks(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return "", "", &ErrNotAccessible{Path: path, Cause: err} + } + return "", "", errors.Wrapf(err, "error while resolving symlinks in base directory %q", path) + } + + combinedPath := filepath.Join(baseResolved, subpath) + combinedResolved, err := filepath.EvalSymlinks(combinedPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return "", "", &ErrNotAccessible{Path: combinedPath, Cause: err} + } + return "", "", errors.Wrapf(err, "error while resolving symlinks in combined path %q", combinedPath) + } + + subpart, err := filepath.Rel(baseResolved, combinedResolved) + if err != nil { + return "", "", &ErrEscapesBase{Base: baseResolved, Subpath: subpath} + } + + if !filepath.IsLocal(subpart) { + return "", "", &ErrEscapesBase{Base: baseResolved, Subpath: subpath} + } + + return baseResolved, subpart, nil +} + +// isLocalTo reports whether path, using lexical analysis only, has all of these properties: +// - is within the subtree rooted at basepath +// - is not empty +// - on Windows, is not a reserved name such as "NUL" +// +// If isLocalTo(path, basepath) returns true, then +// +// filepath.Rel(basepath, path) +// +// will always produce an unrooted path with no `..` elements. +// +// isLocalTo is a purely lexical operation. In particular, it does not account for the effect of any symbolic links that may exist in the filesystem. +// +// Both path and basepath are expected to be absolute paths. +func isLocalTo(path, basepath string) bool { + rel, err := filepath.Rel(basepath, path) + if err != nil { + return false + } + + return filepath.IsLocal(rel) +} diff --git a/internal/safepath/common_test.go b/internal/safepath/common_test.go new file mode 100644 index 0000000000000..284481ae7afbb --- /dev/null +++ b/internal/safepath/common_test.go @@ -0,0 +1,31 @@ +package safepath + +import ( + "testing" + + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" +) + +func TestIsLocalTo(t *testing.T) { + for _, tc := range []struct { + name string + subpath string + result bool + }{ + {name: "same", subpath: "/volume", result: true}, + {name: "1 level subpath", subpath: "/volume/sub", result: true}, + {name: "2 level subpath", subpath: "/volume/sub/path", result: true}, + {name: "absolute", subpath: "/etc/passwd", result: false}, + {name: "backtrack", subpath: "/volume/../", result: false}, + {name: "backtrack inside", subpath: "/volume/sub/../", result: true}, + {name: "relative path", subpath: "./rel", result: false}, + {name: "file with dots", subpath: "/volume/file..with.dots", result: true}, + {name: "file starting with dots", subpath: "/volume/..file", result: true}, + } { + t.Run(tc.name, func(t *testing.T) { + result := isLocalTo(tc.subpath, "/volume") + assert.Check(t, is.Equal(result, tc.result)) + }) + } +} diff --git a/internal/safepath/errors.go b/internal/safepath/errors.go new file mode 100644 index 0000000000000..8fcfe262eeb4f --- /dev/null +++ b/internal/safepath/errors.go @@ -0,0 +1,42 @@ +package safepath + +// ErrNotAccessible is returned by Join when the resulting path doesn't exist, +// is not accessible, or any of the path components was replaced with a symlink +// during the path traversal. +type ErrNotAccessible struct { + Path string + Cause error +} + +func (*ErrNotAccessible) NotFound() {} + +func (e *ErrNotAccessible) Unwrap() error { + return e.Cause +} + +func (e *ErrNotAccessible) Error() string { + msg := "cannot access path " + e.Path + if e.Cause != nil { + msg += ": " + e.Cause.Error() + } + return msg +} + +// ErrEscapesBase is returned by Join when the resulting concatenation would +// point outside of the specified base directory. +type ErrEscapesBase struct { + Base, Subpath string +} + +func (*ErrEscapesBase) InvalidParameter() {} + +func (e *ErrEscapesBase) Error() string { + msg := "path concatenation escapes the base directory" + if e.Base != "" { + msg += ", base: " + e.Base + } + if e.Subpath != "" { + msg += ", subpath: " + e.Subpath + } + return msg +} diff --git a/internal/safepath/join_linux.go b/internal/safepath/join_linux.go new file mode 100644 index 0000000000000..c93f7db8fa86b --- /dev/null +++ b/internal/safepath/join_linux.go @@ -0,0 +1,150 @@ +package safepath + +import ( + "context" + "os" + "path/filepath" + "runtime" + "strconv" + + "github.com/containerd/log" + "github.com/docker/docker/internal/unix_noeintr" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// Join makes sure that the concatenation of path and subpath doesn't +// resolve to a path outside of path and returns a path to a temporary file that is +// a bind mount to the the exact same file/directory that was validated. +// +// After use, it is the caller's responsibility to call Close on the returned +// SafePath object, which will unmount the temporary file/directory +// and remove it. +func Join(_ context.Context, path, subpath string) (*SafePath, error) { + base, subpart, err := evaluatePath(path, subpath) + if err != nil { + return nil, err + } + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + fd, err := safeOpenFd(base, subpart) + if err != nil { + return nil, err + } + + defer unix_noeintr.Close(fd) + + tmpMount, err := tempMountPoint(fd) + if err != nil { + return nil, errors.Wrap(err, "failed to create temporary file for safe mount") + } + + pid := strconv.Itoa(unix.Gettid()) + // Using explicit pid path, because /proc/self/fd/ fails with EACCES + // when running under "Enhanced Container Isolation" in Docker Desktop + // which uses sysbox runtime under the hood. + // TODO(vvoland): Investigate. + mountSource := "/proc/" + pid + "/fd/" + strconv.Itoa(fd) + + if err := unix_noeintr.Mount(mountSource, tmpMount, "none", unix.MS_BIND, ""); err != nil { + os.Remove(tmpMount) + return nil, errors.Wrap(err, "failed to mount resolved path") + } + + return &SafePath{ + path: tmpMount, + sourceBase: base, + sourceSubpath: subpart, + cleanup: cleanupSafePath(tmpMount), + }, nil +} + +// safeOpenFd opens the file at filepath.Join(path, subpath) in O_PATH +// mode and returns the file descriptor if subpath is within the subtree +// rooted at path. It is an error if any of components of path or subpath +// are symbolic links. +// +// It is a caller's responsibility to close the returned file descriptor, if no +// error was returned. +func safeOpenFd(path, subpath string) (int, error) { + // Open base volume path (_data directory). + prevFd, err := unix_noeintr.Open(path, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC|unix.O_NOFOLLOW, 0) + if err != nil { + return -1, &ErrNotAccessible{Path: path, Cause: err} + } + defer unix_noeintr.Close(prevFd) + + // Try to use the Openat2 syscall first (available on Linux 5.6+). + fd, err := unix_noeintr.Openat2(prevFd, subpath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Mode: 0, + Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS | unix.RESOLVE_NO_SYMLINKS, + }) + + switch { + case errors.Is(err, unix.ENOSYS): + // Openat2 is not available, fallback to Openat loop. + return kubernetesSafeOpen(path, subpath) + case errors.Is(err, unix.EXDEV): + return -1, &ErrEscapesBase{Base: path, Subpath: subpath} + case errors.Is(err, unix.ENOENT), errors.Is(err, unix.ELOOP): + return -1, &ErrNotAccessible{Path: filepath.Join(path, subpath), Cause: err} + case err != nil: + return -1, &os.PathError{Op: "openat2", Path: subpath, Err: err} + } + + // Openat2 is available and succeeded. + return fd, nil +} + +// tempMountPoint creates a temporary file/directory to act as mount +// point for the file descriptor. +func tempMountPoint(sourceFd int) (string, error) { + var stat unix.Stat_t + err := unix_noeintr.Fstat(sourceFd, &stat) + if err != nil { + return "", errors.Wrap(err, "failed to Fstat mount source fd") + } + + isDir := (stat.Mode & unix.S_IFMT) == unix.S_IFDIR + if isDir { + return os.MkdirTemp("", "safe-mount") + } + + f, err := os.CreateTemp("", "safe-mount") + if err != nil { + return "", err + } + + p := f.Name() + if err := f.Close(); err != nil { + return "", err + } + return p, nil +} + +// cleanupSafePaths returns a function that unmounts the path and removes the +// mountpoint. +func cleanupSafePath(path string) func(context.Context) error { + return func(ctx context.Context) error { + log.G(ctx).WithField("path", path).Debug("removing safe temp mount") + + if err := unix_noeintr.Unmount(path, unix.MNT_DETACH); err != nil { + if errors.Is(err, unix.EINVAL) { + log.G(ctx).WithField("path", path).Warn("safe temp mount no longer exists?") + return nil + } + return errors.Wrapf(err, "error unmounting safe mount %s", path) + } + if err := os.Remove(path); err != nil { + if errors.Is(err, os.ErrNotExist) { + log.G(ctx).WithField("path", path).Warn("safe temp mount no longer exists?") + return nil + } + return errors.Wrapf(err, "failed to delete temporary safe mount") + } + + return nil + } +} diff --git a/internal/safepath/join_test.go b/internal/safepath/join_test.go new file mode 100644 index 0000000000000..b95af3614b8b6 --- /dev/null +++ b/internal/safepath/join_test.go @@ -0,0 +1,145 @@ +package safepath + +import ( + "context" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" +) + +func TestJoinEscapingSymlink(t *testing.T) { + type testCase struct { + name string + target string + } + var cases []testCase + + if runtime.GOOS == "windows" { + cases = []testCase{ + {name: "root", target: `C:\`}, + {name: "absolute file", target: `C:\Windows\System32\cmd.exe`}, + } + } else { + cases = []testCase{ + {name: "root", target: "/"}, + {name: "absolute file", target: "/etc/passwd"}, + } + } + cases = append(cases, testCase{name: "relative", target: "../../"}) + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + err = os.Symlink(tc.target, filepath.Join(dir, "link")) + assert.NilError(t, err, "failed to create symlink to %s", tc.target) + + safe, err := Join(context.Background(), dir, "link") + if err == nil { + safe.Close(context.Background()) + } + assert.ErrorType(t, err, &ErrEscapesBase{}) + }) + } +} + +func TestJoinGoodSymlink(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + assert.Assert(t, os.WriteFile(filepath.Join(dir, "foo"), []byte("bar"), 0o744), "failed to create file 'foo'") + assert.Assert(t, os.Mkdir(filepath.Join(dir, "subdir"), 0o744), "failed to create directory 'subdir'") + assert.Assert(t, os.WriteFile(filepath.Join(dir, "subdir/hello.txt"), []byte("world"), 0o744), "failed to create file 'subdir/hello.txt'") + + assert.Assert(t, os.Symlink(filepath.Join(dir, "subdir"), filepath.Join(dir, "subdir_link_absolute")), "failed to create absolute symlink to directory 'subdir'") + assert.Assert(t, os.Symlink("subdir", filepath.Join(dir, "subdir_link_relative")), "failed to create relative symlink to directory 'subdir'") + + assert.Assert(t, os.Symlink(filepath.Join(dir, "foo"), filepath.Join(dir, "foo_link_absolute")), "failed to create absolute symlink to file 'foo'") + assert.Assert(t, os.Symlink("foo", filepath.Join(dir, "foo_link_relative")), "failed to create relative symlink to file 'foo'") + + for _, target := range []string{ + "foo", "subdir", + "subdir_link_absolute", "foo_link_absolute", + "subdir_link_relative", "foo_link_relative", + } { + t.Run(target, func(t *testing.T) { + safe, err := Join(context.Background(), dir, target) + assert.NilError(t, err) + + defer safe.Close(context.Background()) + if strings.HasPrefix(target, "subdir") { + data, err := os.ReadFile(filepath.Join(safe.Path(), "hello.txt")) + assert.NilError(t, err) + assert.Assert(t, is.Equal(string(data), "world")) + } + }) + } +} + +func TestJoinWithSymlinkReplace(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + link := filepath.Join(dir, "link") + target := filepath.Join(dir, "foo") + + err = os.WriteFile(target, []byte("bar"), 0o744) + assert.NilError(t, err, "failed to create test file") + + err = os.Symlink(target, link) + assert.Check(t, err, "failed to create symlink to foo") + + safe, err := Join(context.Background(), dir, "link") + assert.NilError(t, err) + + defer safe.Close(context.Background()) + + // Delete the link target. + err = os.Remove(target) + if runtime.GOOS == "windows" { + // On Windows it shouldn't be possible. + assert.Assert(t, is.ErrorType(err, &os.PathError{}), "link shouldn't be deletable before cleanup") + } else { + // On Linux we can delete it just fine. + assert.NilError(t, err, "failed to remove symlink") + + // Replace target with a symlink to /etc/paswd + err = os.Symlink("/etc/passwd", target) + assert.NilError(t, err, "failed to create symlink") + } + + // The returned safe path should still point to the old file. + data, err := os.ReadFile(safe.Path()) + assert.NilError(t, err, "failed to read file") + + assert.Check(t, is.Equal(string(data), "bar")) + +} + +func TestJoinCloseInvalidates(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err) + + foo := filepath.Join(dir, "foo") + err = os.WriteFile(foo, []byte("bar"), 0o744) + assert.NilError(t, err, "failed to create test file") + + safe, err := Join(context.Background(), dir, "foo") + assert.NilError(t, err) + + assert.Check(t, safe.IsValid()) + + assert.NilError(t, safe.Close(context.Background())) + + assert.Check(t, !safe.IsValid()) +} diff --git a/internal/safepath/join_windows.go b/internal/safepath/join_windows.go new file mode 100644 index 0000000000000..63c646a682c76 --- /dev/null +++ b/internal/safepath/join_windows.go @@ -0,0 +1,93 @@ +package safepath + +import ( + "context" + "os" + "path/filepath" + "strings" + + "github.com/containerd/log" + "github.com/docker/docker/internal/cleanups" + "github.com/docker/docker/internal/compatcontext" + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// Join locks all individual components of the path which is the concatenation +// of provided path and its subpath, checks that it doesn't escape the base path +// and returns the concatenated path. +// +// The path is safe (the path target won't change) until the returned SafePath +// is Closed. +// Caller is responsible for calling the Close function which unlocks the path. +func Join(ctx context.Context, path, subpath string) (*SafePath, error) { + base, subpart, err := evaluatePath(path, subpath) + if err != nil { + return nil, err + } + parts := strings.Split(subpart, string(os.PathSeparator)) + + cleanups := cleanups.Composite{} + defer func() { + if cErr := cleanups.Call(compatcontext.WithoutCancel(ctx)); cErr != nil { + log.G(ctx).WithError(cErr).Warn("failed to close handles after error") + } + }() + + fullPath := base + for _, part := range parts { + fullPath = filepath.Join(fullPath, part) + + handle, err := lockFile(fullPath) + if err != nil { + if errors.Is(err, windows.ERROR_FILE_NOT_FOUND) { + return nil, &ErrNotAccessible{Path: fullPath, Cause: err} + } + return nil, errors.Wrapf(err, "failed to lock file %s", fullPath) + } + cleanups.Add(func(context.Context) error { + if err := windows.CloseHandle(handle); err != nil { + return &os.PathError{Op: "CloseHandle", Path: fullPath, Err: err} + } + return err + }) + + realPath, err := filepath.EvalSymlinks(fullPath) + if err != nil { + return nil, errors.Wrapf(err, "failed to eval symlinks of %s", fullPath) + } + + if realPath != fullPath && !isLocalTo(realPath, base) { + return nil, &ErrEscapesBase{Base: base, Subpath: subpart} + } + + var info windows.ByHandleFileInformation + if err := windows.GetFileInformationByHandle(handle, &info); err != nil { + return nil, errors.WithStack(&os.PathError{Op: "GetFileInformationByHandle", Path: fullPath, Err: err}) + } + + if (info.FileAttributes & windows.FILE_ATTRIBUTE_REPARSE_POINT) != 0 { + return nil, &ErrNotAccessible{Path: fullPath, Cause: err} + } + } + + return &SafePath{ + path: fullPath, + sourceBase: base, + sourceSubpath: subpart, + cleanup: cleanups.Release(), + }, nil +} + +func lockFile(path string) (windows.Handle, error) { + p, err := windows.UTF16PtrFromString(path) + if err != nil { + return windows.InvalidHandle, &os.PathError{Op: "UTF16PtrFromString", Path: path, Err: err} + } + const flags = windows.FILE_FLAG_BACKUP_SEMANTICS | windows.FILE_FLAG_OPEN_REPARSE_POINT + handle, err := windows.CreateFile(p, windows.GENERIC_READ, windows.FILE_SHARE_READ, nil, windows.OPEN_EXISTING, flags, 0) + if err != nil { + return handle, &os.PathError{Op: "CreateFile", Path: path, Err: err} + } + return handle, nil +} diff --git a/internal/safepath/k8s_safeopen_linux.go b/internal/safepath/k8s_safeopen_linux.go new file mode 100644 index 0000000000000..ec7c9ed59e691 --- /dev/null +++ b/internal/safepath/k8s_safeopen_linux.go @@ -0,0 +1,112 @@ +package safepath + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "context" + "fmt" + "path/filepath" + "strings" + + "github.com/containerd/log" + "github.com/docker/docker/internal/unix_noeintr" + "golang.org/x/sys/unix" +) + +// kubernetesSafeOpen open path formed by concatenation of the base directory +// and its subpath and return its fd. +// Symlinks are disallowed (pathname must already resolve symlinks) and the path +// path must be within the base directory. +// This is minimally modified code from https://github.com/kubernetes/kubernetes/blob/55fb1805a1217b91b36fa8fe8f2bf3a28af2454d/pkg/volume/util/subpath/subpath_linux.go#L530 +func kubernetesSafeOpen(base, subpath string) (int, error) { + // syscall.Openat flags used to traverse directories not following symlinks + const nofollowFlags = unix.O_RDONLY | unix.O_NOFOLLOW + // flags for getting file descriptor without following the symlink + const openFDFlags = unix.O_NOFOLLOW | unix.O_PATH + + pathname := filepath.Join(base, subpath) + segments := strings.Split(subpath, string(filepath.Separator)) + + // Assumption: base is the only directory that we have under control. + // Base dir is not allowed to be a symlink. + parentFD, err := unix_noeintr.Open(base, nofollowFlags|unix.O_CLOEXEC, 0) + if err != nil { + return -1, &ErrNotAccessible{Path: base, Cause: err} + } + defer func() { + if parentFD != -1 { + if err = unix_noeintr.Close(parentFD); err != nil { + log.G(context.TODO()).Errorf("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err) + } + } + }() + + childFD := -1 + defer func() { + if childFD != -1 { + if err = unix_noeintr.Close(childFD); err != nil { + log.G(context.TODO()).Errorf("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := base + + // Follow the segments one by one using openat() to make + // sure the user cannot change already existing directories into symlinks. + for _, seg := range segments { + var deviceStat unix.Stat_t + + currentPath = filepath.Join(currentPath, seg) + if !isLocalTo(currentPath, base) { + return -1, &ErrEscapesBase{Base: currentPath, Subpath: seg} + } + + // Trigger auto mount if it's an auto-mounted directory, ignore error if not a directory. + // Notice the trailing slash is mandatory, see "automount" in openat(2) and open_by_handle_at(2). + unix_noeintr.Fstatat(parentFD, seg+"/", &deviceStat, unix.AT_SYMLINK_NOFOLLOW) + + log.G(context.TODO()).Debugf("Opening path %s", currentPath) + childFD, err = unix_noeintr.Openat(parentFD, seg, openFDFlags|unix.O_CLOEXEC, 0) + if err != nil { + return -1, &ErrNotAccessible{Path: currentPath, Cause: err} + } + + err := unix_noeintr.Fstat(childFD, &deviceStat) + if err != nil { + return -1, fmt.Errorf("error running fstat on %s with %v", currentPath, err) + } + fileFmt := deviceStat.Mode & unix.S_IFMT + if fileFmt == unix.S_IFLNK { + return -1, fmt.Errorf("unexpected symlink found %s", currentPath) + } + + // Close parentFD + if err = unix_noeintr.Close(parentFD); err != nil { + return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err) + } + // Set child to new parent + parentFD = childFD + childFD = -1 + } + + // We made it to the end, return this fd, don't close it + finalFD := parentFD + parentFD = -1 + + return finalFD, nil +} diff --git a/internal/safepath/safepath.go b/internal/safepath/safepath.go new file mode 100644 index 0000000000000..c43e06fd22b90 --- /dev/null +++ b/internal/safepath/safepath.go @@ -0,0 +1,63 @@ +package safepath + +import ( + "context" + "fmt" + "sync" + + "github.com/containerd/log" +) + +type SafePath struct { + path string + cleanup func(ctx context.Context) error + mutex sync.Mutex + + // Immutable fields + sourceBase, sourceSubpath string +} + +// Close releases the resources used by the path. +func (s *SafePath) Close(ctx context.Context) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + if s.path == "" { + base, sub := s.SourcePath() + log.G(ctx).WithFields(log.Fields{ + "path": s.Path(), + "sourceBase": base, + "sourceSubpath": sub, + }).Warn("an attempt to close an already closed SafePath") + return nil + } + + s.path = "" + if s.cleanup != nil { + return s.cleanup(ctx) + } + return nil +} + +// IsValid return true when path can still be used and wasn't cleaned up by Close. +func (s *SafePath) IsValid() bool { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.path != "" +} + +// Path returns a safe, temporary path that can be used to access the original path. +func (s *SafePath) Path() string { + s.mutex.Lock() + defer s.mutex.Unlock() + if s.path == "" { + panic(fmt.Sprintf("use-after-close attempted for safepath with source [%s, %s]", s.sourceBase, s.sourceSubpath)) + } + return s.path +} + +// SourcePath returns the source path the safepath points to. +func (s *SafePath) SourcePath() (string, string) { + // No mutex lock because these are immutable. + return s.sourceBase, s.sourceSubpath +} diff --git a/internal/unix_noeintr/fs_unix.go b/internal/unix_noeintr/fs_unix.go new file mode 100644 index 0000000000000..32c72d00413ed --- /dev/null +++ b/internal/unix_noeintr/fs_unix.go @@ -0,0 +1,85 @@ +//go:build !windows + +// Wrappers for unix syscalls that retry on EINTR +// TODO: Consider moving (for example to moby/sys) and making the wrappers +// auto-generated. +package unix_noeintr + +import ( + "errors" + + "golang.org/x/sys/unix" +) + +func Retry(f func() error) { + for { + err := f() + if !errors.Is(err, unix.EINTR) { + return + } + } +} + +func Mount(source string, target string, fstype string, flags uintptr, data string) (err error) { + Retry(func() error { + err = unix.Mount(source, target, fstype, flags, data) + return err + }) + return +} + +func Unmount(target string, flags int) (err error) { + Retry(func() error { + err = unix.Unmount(target, flags) + return err + }) + return +} + +func Open(path string, mode int, perm uint32) (fd int, err error) { + Retry(func() error { + fd, err = unix.Open(path, mode, perm) + return err + }) + return +} + +func Close(fd int) (err error) { + Retry(func() error { + err = unix.Close(fd) + return err + }) + return +} + +func Openat(dirfd int, path string, mode int, perms uint32) (fd int, err error) { + Retry(func() error { + fd, err = unix.Openat(dirfd, path, mode, perms) + return err + }) + return +} + +func Openat2(dirfd int, path string, how *unix.OpenHow) (fd int, err error) { + Retry(func() error { + fd, err = unix.Openat2(dirfd, path, how) + return err + }) + return +} + +func Fstat(fd int, stat *unix.Stat_t) (err error) { + Retry(func() error { + err = unix.Fstat(fd, stat) + return err + }) + return +} + +func Fstatat(fd int, path string, stat *unix.Stat_t, flags int) (err error) { + Retry(func() error { + err = unix.Fstatat(fd, path, stat, flags) + return err + }) + return +} diff --git a/volume/mounts/linux_parser.go b/volume/mounts/linux_parser.go index 1b64c239358f4..eef1cc2ec8dab 100644 --- a/volume/mounts/linux_parser.go +++ b/volume/mounts/linux_parser.go @@ -94,8 +94,18 @@ func (p *linuxParser) validateMountConfigImpl(mnt *mount.Mount, validateBindSour if mnt.BindOptions != nil { return &errMountConfig{mnt, errExtraField("BindOptions")} } + anonymousVolume := len(mnt.Source) == 0 - if len(mnt.Source) == 0 && mnt.ReadOnly { + if mnt.VolumeOptions != nil && mnt.VolumeOptions.Subpath != "" { + if anonymousVolume { + return &errMountConfig{mnt, errAnonymousVolumeWithSubpath} + } + + if !filepath.IsLocal(mnt.VolumeOptions.Subpath) { + return &errMountConfig{mnt, errInvalidSubpath} + } + } + if mnt.ReadOnly && anonymousVolume { return &errMountConfig{mnt, fmt.Errorf("must not set ReadOnly mode when using anonymous volumes")} } case mount.TypeTmpfs: diff --git a/volume/mounts/mounts.go b/volume/mounts/mounts.go index 74caf015ffd20..50f445b6a133b 100644 --- a/volume/mounts/mounts.go +++ b/volume/mounts/mounts.go @@ -9,6 +9,7 @@ import ( "github.com/containerd/log" mounttypes "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/internal/safepath" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/stringid" "github.com/docker/docker/volume" @@ -74,14 +75,34 @@ type MountPoint struct { // Specifically needed for containers which are running and calls to `docker cp` // because both these actions require mounting the volumes. active int + + // SafePaths created by Setup that should be cleaned up before unmounting + // the volume. + safePaths []*safepath.SafePath } -// Cleanup frees resources used by the mountpoint -func (m *MountPoint) Cleanup() error { +// Cleanup frees resources used by the mountpoint and cleans up all the paths +// returned by Setup that hasn't been cleaned up by the caller. +func (m *MountPoint) Cleanup(ctx context.Context) error { if m.Volume == nil || m.ID == "" { return nil } + for _, p := range m.safePaths { + if !p.IsValid() { + continue + } + + err := p.Close(ctx) + base, sub := p.SourcePath() + log.G(ctx).WithFields(log.Fields{ + "error": err, + "path": p.Path(), + "sourceBase": base, + "sourceSubpath": sub, + }).Warn("cleaning up SafePath that hasn't been cleaned up by the caller") + } + if err := m.Volume.Unmount(m.ID); err != nil { return errors.Wrapf(err, "error unmounting volume %s", m.Volume.Name()) } @@ -97,30 +118,42 @@ func (m *MountPoint) Cleanup() error { // configured, or creating the source directory if supplied. // The, optional, checkFun parameter allows doing additional checking // before creating the source directory on the host. -func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun func(m *MountPoint) error) (path string, err error) { +// +// The returned path can be a temporary path, caller is responsible to +// call the returned cleanup function as soon as the path is not needed. +// Cleanup doesn't unmount the underlying volumes (if any), it only +// frees up the resources that were needed to guarantee that the path +// still points to the same target (to avoid TOCTOU attack). +// +// Cleanup function doesn't need to be called when error is returned. +func (m *MountPoint) Setup(ctx context.Context, mountLabel string, rootIDs idtools.Identity, checkFun func(m *MountPoint) error) (path string, cleanup func(context.Context) error, retErr error) { if m.SkipMountpointCreation { - return m.Source, nil + return m.Source, noCleanup, nil } defer func() { - if err != nil || !label.RelabelNeeded(m.Mode) { + if retErr != nil || !label.RelabelNeeded(m.Mode) { return } - var sourcePath string - sourcePath, err = filepath.EvalSymlinks(m.Source) + sourcePath, err := filepath.EvalSymlinks(path) if err != nil { path = "" - err = errors.Wrapf(err, "error evaluating symlinks from mount source %q", m.Source) + retErr = errors.Wrapf(err, "error evaluating symlinks from mount source %q", m.Source) + if cleanupErr := cleanup(ctx); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("failed to cleanup after error") + } + cleanup = noCleanup return } err = label.Relabel(sourcePath, mountLabel, label.IsShared(m.Mode)) - if errors.Is(err, syscall.ENOTSUP) { - err = nil - } - if err != nil { + if err != nil && !errors.Is(err, syscall.ENOTSUP) { path = "" - err = errors.Wrapf(err, "error setting label on mount source '%s'", sourcePath) + retErr = errors.Wrapf(err, "error setting label on mount source '%s'", sourcePath) + if cleanupErr := cleanup(ctx); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("failed to cleanup after error") + } + cleanup = noCleanup } }() @@ -129,18 +162,36 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun if id == "" { id = stringid.GenerateRandomID() } - path, err := m.Volume.Mount(id) + volumePath, err := m.Volume.Mount(id) if err != nil { - return "", errors.Wrapf(err, "error while mounting volume '%s'", m.Source) + return "", noCleanup, errors.Wrapf(err, "error while mounting volume '%s'", m.Source) } m.ID = id + clean := noCleanup + if m.Spec.VolumeOptions != nil && m.Spec.VolumeOptions.Subpath != "" { + subpath := m.Spec.VolumeOptions.Subpath + + safePath, err := safepath.Join(ctx, volumePath, subpath) + if err != nil { + if err := m.Volume.Unmount(id); err != nil { + log.G(ctx).WithError(err).Error("failed to unmount after safepath.Join failed") + } + return "", noCleanup, err + } + m.safePaths = append(m.safePaths, safePath) + log.G(ctx).Debugf("mounting (%s|%s) via %s", volumePath, subpath, safePath.Path()) + + clean = safePath.Close + volumePath = safePath.Path() + } + m.active++ - return path, nil + return volumePath, clean, nil } if len(m.Source) == 0 { - return "", fmt.Errorf("Unable to setup mount point, neither source nor volume defined") + return "", noCleanup, fmt.Errorf("Unable to setup mount point, neither source nor volume defined") } if m.Type == mounttypes.TypeBind { @@ -149,7 +200,7 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun // the process of shutting down. if checkFun != nil { if err := checkFun(m); err != nil { - return "", err + return "", noCleanup, err } } @@ -158,12 +209,12 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun if err := idtools.MkdirAllAndChownNew(m.Source, 0o755, rootIDs); err != nil { if perr, ok := err.(*os.PathError); ok { if perr.Err != syscall.ENOTDIR { - return "", errors.Wrapf(err, "error while creating mount source path '%s'", m.Source) + return "", noCleanup, errors.Wrapf(err, "error while creating mount source path '%s'", m.Source) } } } } - return m.Source, nil + return m.Source, noCleanup, nil } func (m *MountPoint) LiveRestore(ctx context.Context) error { @@ -207,3 +258,8 @@ func errInvalidMode(mode string) error { func errInvalidSpec(spec string) error { return errors.Errorf("invalid volume specification: '%s'", spec) } + +// noCleanup is a no-op cleanup function. +func noCleanup(_ context.Context) error { + return nil +} diff --git a/volume/mounts/parser.go b/volume/mounts/parser.go index 2bcf9ab0539a3..c4ff6c8c7e6c1 100644 --- a/volume/mounts/parser.go +++ b/volume/mounts/parser.go @@ -11,6 +11,14 @@ import ( // It's used by both LCOW and Linux parsers. var ErrVolumeTargetIsRoot = errors.New("invalid specification: destination can't be '/'") +// errAnonymousVolumeWithSubpath is returned when Subpath is specified for +// anonymous volume. +var errAnonymousVolumeWithSubpath = errors.New("must not set Subpath when using anonymous volumes") + +// errInvalidSubpath is returned when the provided Subpath is not lexically an +// relative path within volume. +var errInvalidSubpath = errors.New("subpath must be a relative path within the volume") + // read-write modes var rwModes = map[string]bool{ "rw": true, diff --git a/volume/mounts/validate_test.go b/volume/mounts/validate_test.go index ea0004b7e463f..0af2785e41875 100644 --- a/volume/mounts/validate_test.go +++ b/volume/mounts/validate_test.go @@ -28,6 +28,9 @@ func TestValidateMount(t *testing.T) { { input: mount.Mount{Type: mount.TypeVolume, Target: testDestinationPath}, }, + { + input: mount.Mount{Type: mount.TypeVolume, Target: testDestinationPath, Source: "hello", VolumeOptions: &mount.VolumeOptions{Subpath: "world"}}, + }, { input: mount.Mount{Type: mount.TypeBind}, expected: errMissingField("Target"), diff --git a/volume/mounts/windows_parser.go b/volume/mounts/windows_parser.go index f9f0f08f44d8a..c3a6c6bb69f1e 100644 --- a/volume/mounts/windows_parser.go +++ b/volume/mounts/windows_parser.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "path/filepath" "regexp" "runtime" "strings" @@ -258,7 +259,19 @@ func (p *windowsParser) validateMountConfigReg(mnt *mount.Mount, additionalValid return &errMountConfig{mnt, errExtraField("BindOptions")} } - if len(mnt.Source) == 0 && mnt.ReadOnly { + anonymousVolume := len(mnt.Source) == 0 + if mnt.VolumeOptions != nil && mnt.VolumeOptions.Subpath != "" { + if anonymousVolume { + return errAnonymousVolumeWithSubpath + } + + // Check if path is relative but without any back traversals + if !filepath.IsLocal(mnt.VolumeOptions.Subpath) { + return &errMountConfig{mnt, errInvalidSubpath} + } + } + + if anonymousVolume && mnt.ReadOnly { return &errMountConfig{mnt, fmt.Errorf("must not set ReadOnly mode when using anonymous volumes")} }