Skip to content

Commit

Permalink
Support more flexible LCOW layer parsing
Browse files Browse the repository at this point in the history
Previously, layer information for both Windows and Linux containers was
passed throughout the shim through the OCI runtime spec's
Windows.LayerFolders field. This was used to store the set of
directories used for the layers, including the scratch. The exact
semantics of what is expected in these directories differed between
Windows and Linux. This approach worked okay, but had a few annoying
limitations. For instance, there was no way to represent more complex
layer data, such as a VHD path as well as a partition index on that VHD.

This change removes the use of Windows.LayerFolders completely for Linux
containers, and instead creates a new layers.LCOWLayers type that is
used to represent Linux layer configuration. This new type is passed
into hcsoci.CreateContainer, and from there is passed into
layers.MountLCOWLayers where it is actually used to set up the
filesystem for the container.

The new layers.LCOWLayers type is currently quite simple, but having
this as a proper Go type allows us a lot of flexibility in the future.
We can add more fields on this struct, but we could also change out the
nested LCOWLayer type for an interface, for instance, if we wanted to
support new types of layers that have drastically different
representation.

This change does not aim to touch the way Windows container layers are
handled, nor how the Windows UVM root filesystem is set up. These would
be good things to improve in the future, but the Windows container
layers are more complicated in how they are used, so this is left for
future work.

Signed-off-by: Kevin Parsons <kevpar@microsoft.com>
  • Loading branch information
kevpar committed Apr 25, 2023
1 parent 7397839 commit f25b932
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 47 deletions.
92 changes: 92 additions & 0 deletions cmd/containerd-shim-runhcs-v1/rootfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package main

import (
"encoding/json"
"fmt"
"path/filepath"
"strings"

"github.com/Microsoft/hcsshim/internal/layers"
"github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/mount"
)

// validateRootfsAndLayers checks to ensure we have appropriate information
// for setting up the container's root filesystem. It ensures the following:
// - One and only one of Rootfs or LayerFolders can be provided.
// - If LayerFolders are provided, there are at least two entries.
// - If Rootfs is provided, there is a single entry and it does not have a Target set.
func validateRootfsAndLayers(rootfs []*types.Mount, layerFolders []string) error {
if len(rootfs) > 0 && len(layerFolders) > 0 {
return fmt.Errorf("cannot pass both a rootfs mount and Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition)
}
if len(rootfs) == 0 && len(layerFolders) == 0 {
return fmt.Errorf("must pass either a rootfs mount or Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition)
}
if len(layerFolders) > 0 && len(layerFolders) < 2 {
return fmt.Errorf("must pass at least two Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition)
}
if len(rootfs) > 1 || rootfs[0].Target != "" {
return fmt.Errorf("expected a single rootfs mount with no Target path: %w", errdefs.ErrFailedPrecondition)
}
return nil
}

// parseLegacyRootfsMount parses the rootfs mount format that we have traditionally
// used for both Linux and Windows containers.
// The mount format consists of:
// - The scratch folder path in m.Source, which contains sandbox.vhdx.
// - A mount option in the form parentLayerPaths=<JSON>, where JSON is an array of
// string paths to read-only layer directories. The exact contents of these layer
// directories are intepreteted differently for Linux and Windows containers.
func parseLegacyRootfsMount(m *types.Mount) (string, []string, error) {
// parentLayerPaths are passed in layerN, layerN-1, ..., layer 0
//
// The OCI spec expects:
// layerN, layerN-1, ..., layer0, scratch
var parentLayerPaths []string
for _, option := range m.Options {
if strings.HasPrefix(option, mount.ParentLayerPathsFlag) {
err := json.Unmarshal([]byte(option[len(mount.ParentLayerPathsFlag):]), &parentLayerPaths)
if err != nil {
return "", nil, fmt.Errorf("unmarshal parent layer paths from mount: %w: %w", err, errdefs.ErrFailedPrecondition)
}
}
}
return m.Source, parentLayerPaths, nil
}

// getLCOWLayers returns a layers.LCOWLayers describing the rootfs that should be set up
// for an LCOW container. It takes as input the set of rootfs mounts and the layer folders
// from the OCI spec, it is assumed that these were previously checked with validateRootfsAndLayers
// such that only one of them is populated.
func getLCOWLayers(rootfs []*types.Mount, layerFolders []string) (*layers.LCOWLayers, error) {
legacyLayer := func(scratchLayer string, parentLayers []string) *layers.LCOWLayers {
// Each read-only layer should have a layer.vhd, and the scratch layer should have a sandbox.vhdx.
roLayers := make([]*layers.LCOWLayer, 0, len(parentLayers))
for _, parentLayer := range parentLayers {
roLayers = append(roLayers, &layers.LCOWLayer{VHDPath: filepath.Join(parentLayer, "layer.vhd")})
}
return &layers.LCOWLayers{
Layers: roLayers,
ScratchVHDPath: filepath.Join(scratchLayer, "sandbox.vhdx"),
}
}
// Due to previous validation, we know that for a Linux container we either have LayerFolders, or
// a single rootfs mount.
if len(layerFolders) > 0 {
return legacyLayer(layerFolders[len(layerFolders)-1], layerFolders[:len(layerFolders)-1]), nil
}
m := rootfs[0]
switch m.Type {
case "lcow-layer":
scratchLayer, parentLayers, err := parseLegacyRootfsMount(rootfs[0])
if err != nil {
return nil, err
}
return legacyLayer(scratchLayer, parentLayers), nil
default:
return nil, fmt.Errorf("unrecognized rootfs mount type: %s", m.Type)
}
}
61 changes: 27 additions & 34 deletions cmd/containerd-shim-runhcs-v1/service_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
"github.com/Microsoft/hcsshim/internal/extendedtask"
"github.com/Microsoft/hcsshim/internal/oci"
"github.com/Microsoft/hcsshim/internal/shimdiag"
containerd_v1_types "github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/runtime/v2/task"
"github.com/containerd/typeurl"
google_protobuf1 "github.com/gogo/protobuf/types"
Expand Down Expand Up @@ -115,46 +113,41 @@ func (s *service) createInternal(ctx context.Context, req *task.CreateTaskReques
}
}

if len(req.Rootfs) == 0 {
// If no mounts are passed via the snapshotter its the callers full
// responsibility to manage the storage. Just move on without affecting
// the config.json at all.
if spec.Windows == nil || len(spec.Windows.LayerFolders) < 2 {
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "no Windows.LayerFolders found in oci spec")
}
} else if len(req.Rootfs) != 1 {
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "Rootfs does not contain exactly 1 mount for the root file system")
} else {
m := req.Rootfs[0]
if m.Type != "windows-layer" && m.Type != "lcow-layer" {
return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "unsupported mount type '%s'", m.Type)
}
var layerFolders []string
if spec.Windows != nil {
layerFolders = spec.Windows.LayerFolders
}
if err := validateRootfsAndLayers(req.Rootfs, layerFolders); err != nil {
return nil, err
}

// parentLayerPaths are passed in layerN, layerN-1, ..., layer 0
//
// The OCI spec expects:
// layerN, layerN-1, ..., layer0, scratch
var parentLayerPaths []string
for _, option := range m.Options {
if strings.HasPrefix(option, mount.ParentLayerPathsFlag) {
err := json.Unmarshal([]byte(option[len(mount.ParentLayerPathsFlag):]), &parentLayerPaths)
if err != nil {
return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "failed to unmarshal parent layer paths from mount: %v", err)
}
}
// Only work with Windows here.
// Parsing of the rootfs mount for Linux containers occurs later.
if spec.Linux == nil && len(req.Rootfs) > 0 {
// For Windows containers, we work with LayerFolders throughout
// much of the creation logic in the shim. If we were given a
// rootfs mount, convert it to LayerFolders here.
m := req.Rootfs[0]
if m.Type != "windows-layer" {
return nil, fmt.Errorf("unsupported Windows mount type: %s", m.Type)
}

// This is a Windows Argon make sure that we have a Root filled in.
if spec.Windows.HyperV == nil {
if spec.Root == nil {
spec.Root = &specs.Root{}
}
source, parentLayerPaths, err := parseLegacyRootfsMount(m)
if err != nil {
return nil, err
}

// Append the parents
spec.Windows.LayerFolders = append(spec.Windows.LayerFolders, parentLayerPaths...)
// Append the scratch
spec.Windows.LayerFolders = append(spec.Windows.LayerFolders, m.Source)
spec.Windows.LayerFolders = append(spec.Windows.LayerFolders, source)
}

// This is a Windows Argon make sure that we have a Root filled in.
if spec.Windows.HyperV == nil {
if spec.Root == nil {
spec.Root = &specs.Root{}
}
}

if req.Terminal && req.Stderr != "" {
Expand Down
25 changes: 23 additions & 2 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/v2/task"
Expand Down Expand Up @@ -118,7 +119,16 @@ func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.Creat

// createContainer is a generic call to return either a process/hypervisor isolated container, or a job container
// based on what is set in the OCI spec.
func createContainer(ctx context.Context, id, owner, netNS string, s *specs.Spec, parent *uvm.UtilityVM, shimOpts *runhcsopts.Options) (cow.Container, *resources.Resources, error) {
func createContainer(
ctx context.Context,
id,
owner,
netNS string,
s *specs.Spec,
parent *uvm.UtilityVM,
shimOpts *runhcsopts.Options,
rootfs []*types.Mount,
) (cow.Container, *resources.Resources, error) {
var (
err error
container cow.Container
Expand All @@ -138,6 +148,17 @@ func createContainer(ctx context.Context, id, owner, netNS string, s *specs.Spec
HostingSystem: parent,
NetworkNamespace: netNS,
}
if s.Linux != nil {
var layerFolders []string
if s.Windows != nil {
layerFolders = s.Windows.LayerFolders
}
lcowLayers, err := getLCOWLayers(rootfs, layerFolders)
if err != nil {
return nil, nil, err
}
opts.LCOWLayers = lcowLayers
}
if shimOpts != nil {
opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
}
Expand Down Expand Up @@ -192,7 +213,7 @@ func newHcsTask(
return nil, err
}

container, resources, err := createContainer(ctx, req.ID, owner, netNS, s, parent, shimOpts)
container, resources, err := createContainer(ctx, req.ID, owner, netNS, s, parent, shimOpts, req.Rootfs)
if err != nil {
return nil, err
}
Expand Down
2 changes: 2 additions & 0 deletions internal/hcsoci/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/Microsoft/hcsshim/internal/guestpath"
"github.com/Microsoft/hcsshim/internal/hcs"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/layers"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/oci"
"github.com/Microsoft/hcsshim/internal/resources"
Expand Down Expand Up @@ -43,6 +44,7 @@ type CreateOptions struct {
SchemaVersion *hcsschema.Version // Requested Schema Version. Defaults to v2 for RS5, v1 for RS1..RS4
HostingSystem *uvm.UtilityVM // Utility or service VM in which the container is to be created.
NetworkNamespace string // Host network namespace to use (overrides anything in the spec)
LCOWLayers *layers.LCOWLayers

// This is an advanced debugging parameter. It allows for diagnosability by leaving a containers
// resources allocated in case of a failure. Thus you would be able to use tools such as hcsdiag
Expand Down
4 changes: 2 additions & 2 deletions internal/hcsoci/resources_lcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r *
coi.Spec.Root = &specs.Root{}
}
containerRootInUVM := r.ContainerRootInUVM()
if coi.Spec.Windows != nil && len(coi.Spec.Windows.LayerFolders) > 0 {
if coi.LCOWLayers != nil {
log.G(ctx).Debug("hcsshim::allocateLinuxResources mounting storage")
rootPath, scratchPath, closer, err := layers.MountLCOWLayers(ctx, coi.actualID, coi.Spec.Windows.LayerFolders, containerRootInUVM, coi.HostingSystem)
rootPath, scratchPath, closer, err := layers.MountLCOWLayers(ctx, coi.actualID, coi.LCOWLayers, containerRootInUVM, coi.HostingSystem)
if err != nil {
return errors.Wrap(err, "failed to mount container storage")
}
Expand Down
30 changes: 21 additions & 9 deletions internal/layers/layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ import (
"github.com/Microsoft/hcsshim/internal/wclayer"
)

type LCOWLayer struct {
VHDPath string
}

// Defines a set of LCOW layers.
// For future extensibility, the LCOWLayer type could be swapped for an interface,
// and we could either call some method on the interface to "apply" it directly to the UVM,
// or type cast it to the various types that we support, and use the one it matches.
// This would allow us to support different "types" of mounts, such as raw VHD, VHD+partition, etc.
type LCOWLayers struct {
// Should be in order from top-most layer to bottom-most layer.
Layers []*LCOWLayer
ScratchVHDPath string
}

type lcowLayersCloser struct {
uvm *uvm.UtilityVM
guestCombinedLayersPath string
Expand Down Expand Up @@ -65,7 +80,7 @@ func (lc *lcowLayersCloser) Release(ctx context.Context) (retErr error) {
// Returns the path at which the `rootfs` of the container can be accessed. Also, returns the path inside the
// UVM at which container scratch directory is located. Usually, this path is the path at which the container
// scratch VHD is mounted. However, in case of scratch sharing this is a directory under the UVM scratch.
func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot string, vm *uvm.UtilityVM) (_, _ string, _ resources.ResourceCloser, err error) {
func MountLCOWLayers(ctx context.Context, containerID string, layers *LCOWLayers, guestRoot string, vm *uvm.UtilityVM) (_, _ string, _ resources.ResourceCloser, err error) {
if vm.OS() != "linux" {
return "", "", nil, errors.New("MountLCOWLayers should only be called for LCOW")
}
Expand All @@ -87,21 +102,18 @@ func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []str
}
}()

for _, layerPath := range layerFolders[:len(layerFolders)-1] {
log.G(ctx).WithField("layerPath", layerPath).Debug("mounting layer")
var (
layerPath = filepath.Join(layerPath, "layer.vhd")
uvmPath string
)
uvmPath, closer, err := addLCOWLayer(ctx, vm, layerPath)
for _, layer := range layers.Layers {
log.G(ctx).WithField("layerPath", layer.VHDPath).Debug("mounting layer")
uvmPath, closer, err := addLCOWLayer(ctx, vm, layer.VHDPath)
if err != nil {
return "", "", nil, fmt.Errorf("failed to add LCOW layer: %s", err)
}
layerClosers = append(layerClosers, closer)
lcowUvmLayerPaths = append(lcowUvmLayerPaths, uvmPath)
}

hostPath, err := getScratchVHDPath(layerFolders)
hostPath := layers.ScratchVHDPath
hostPath, err = filepath.EvalSymlinks(hostPath)
if err != nil {
return "", "", nil, fmt.Errorf("failed to eval symlinks on scratch path: %w", err)
}
Expand Down

0 comments on commit f25b932

Please sign in to comment.