-
Notifications
You must be signed in to change notification settings - Fork 18.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for CDI devices under Linux #45134
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package daemon | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" | ||
"github.com/docker/docker/errdefs" | ||
"github.com/hashicorp/go-multierror" | ||
specs "github.com/opencontainers/runtime-spec/specs-go" | ||
"github.com/pkg/errors" | ||
"github.com/sirupsen/logrus" | ||
) | ||
|
||
type cdiHandler struct { | ||
registry *cdi.Cache | ||
} | ||
|
||
// RegisterCDIDriver registers the CDI device driver. | ||
// The driver injects CDI devices into an incoming OCI spec and is called for DeviceRequests associated with CDI devices. | ||
func RegisterCDIDriver(opts ...cdi.Option) { | ||
cache, err := cdi.NewCache(opts...) | ||
if err != nil { | ||
logrus.WithError(err).Error("CDI registry initialization failed") | ||
// We create a spec updater that always returns an error. | ||
// This error will be returned only when a CDI device is requested. | ||
// This ensures that daemon startup is not blocked by a CDI registry initialization failure. | ||
errorOnUpdateSpec := func(s *specs.Spec, dev *deviceInstance) error { | ||
return fmt.Errorf("CDI device injection failed due to registry initialization failure: %w", err) | ||
} | ||
driver := &deviceDriver{ | ||
updateSpec: errorOnUpdateSpec, | ||
} | ||
registerDeviceDriver("cdi", driver) | ||
return | ||
} | ||
|
||
// We construct a spec updates that injects CDI devices into the OCI spec using the initialized registry. | ||
c := &cdiHandler{ | ||
registry: cache, | ||
} | ||
|
||
driver := &deviceDriver{ | ||
updateSpec: c.injectCDIDevices, | ||
} | ||
|
||
registerDeviceDriver("cdi", driver) | ||
} | ||
|
||
// injectCDIDevices injects a set of CDI devices into the specified OCI specification. | ||
func (c *cdiHandler) injectCDIDevices(s *specs.Spec, dev *deviceInstance) error { | ||
if dev.req.Count != 0 { | ||
return errdefs.InvalidParameter(errors.New("unexpected count in CDI device request")) | ||
} | ||
if len(dev.req.Options) > 0 { | ||
return errdefs.InvalidParameter(errors.New("unexpected options in CDI device request")) | ||
} | ||
|
||
cdiDeviceNames := dev.req.DeviceIDs | ||
if len(cdiDeviceNames) == 0 { | ||
return nil | ||
} | ||
|
||
_, err := c.registry.InjectDevices(s, cdiDeviceNames...) | ||
if err != nil { | ||
if rerrs := c.getErrors(); rerrs != nil { | ||
// We log the errors that may have been generated while refreshing the CDI registry. | ||
// These may be due to malformed specifications or device name conflicts that could be | ||
// the cause of an injection failure. | ||
logrus.WithError(rerrs).Warning("Refreshing the CDI registry generated errors") | ||
} | ||
|
||
return fmt.Errorf("CDI device injection failed: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// getErrors returns a single error representation of errors that may have occurred while refreshing the CDI registry. | ||
func (c *cdiHandler) getErrors() error { | ||
errors := c.registry.GetErrors() | ||
|
||
var err *multierror.Error | ||
for _, errs := range errors { | ||
err = multierror.Append(err, errs...) | ||
} | ||
return err.ErrorOrNil() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package container // import "github.com/docker/docker/integration/container" | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"io" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/docker/docker/api/types" | ||
containertypes "github.com/docker/docker/api/types/container" | ||
"github.com/docker/docker/integration/internal/container" | ||
"github.com/docker/docker/pkg/stdcopy" | ||
"github.com/docker/docker/testutil/daemon" | ||
"gotest.tools/v3/assert" | ||
is "gotest.tools/v3/assert/cmp" | ||
"gotest.tools/v3/skip" | ||
) | ||
|
||
func TestCreateWithCDIDevices(t *testing.T) { | ||
skip.If(t, testEnv.OSType != "linux", "CDI devices are only supported on Linux") | ||
skip.If(t, testEnv.IsRemoteDaemon, "cannot run cdi tests with a remote daemon") | ||
|
||
cwd, err := os.Getwd() | ||
assert.NilError(t, err) | ||
d := daemon.New(t, daemon.WithExperimental()) | ||
d.StartWithBusybox(t, "--cdi-spec-dir="+filepath.Join(cwd, "testdata", "cdi")) | ||
defer d.Stop(t) | ||
|
||
client := d.NewClientT(t) | ||
|
||
ctx := context.Background() | ||
id := container.Run(ctx, t, client, | ||
container.WithCmd("/bin/sh", "-c", "env"), | ||
container.WithCDIDevices("vendor1.com/device=foo"), | ||
) | ||
defer client.ContainerRemove(ctx, id, types.ContainerRemoveOptions{Force: true}) | ||
|
||
inspect, err := client.ContainerInspect(ctx, id) | ||
assert.NilError(t, err) | ||
|
||
expectedRequests := []containertypes.DeviceRequest{ | ||
{ | ||
Driver: "cdi", | ||
DeviceIDs: []string{"vendor1.com/device=foo"}, | ||
}, | ||
} | ||
assert.Check(t, is.DeepEqual(inspect.HostConfig.DeviceRequests, expectedRequests)) | ||
|
||
reader, err := client.ContainerLogs(ctx, id, types.ContainerLogsOptions{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @corhere I initially tried to implement something that inspects the container here, but did not see the expected environment variable. This is probably because the container config is being based on the unmodified spec, so if the expectation is that the config shows the new envvar, then we would need to do some kind of reconcilliation after making the modifications. How is this currently handled for other entities (e.g. devices or mounts that are requested?). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had been wondering about that. I noticed that when this was raised before (docker/cli#3864 (comment)) it seemed the preference was to indeed have the spec shown by That said, it seems like this could be solved at the moby level by capturing the OCI spec after all the DeviceRequests are processed and returning that in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, at present the I agree that the goal should be to get these changes visible to daemon, but I would suggest tackling this in a follow-up if possible. Currently, I don't believe that any of the container engines that offer CDI support do this kind of reconciliation -- unliess they use the on-disk There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The container's OCI spec is based on the container config, not the other way around. The container config is the source of truth. No reconciliation is performed for other entities. I really wish we'd thought to record the maintainers call which you joined where we hashed this all out, because I vaguely recall we concluded reporting the CDI device request as-is when inspecting the container, without back-propagating how it patches the OCI spec, would be acceptable as it is sufficient to flag that that the container's OCI spec has been patched by CDI and therefore any issues reported with that particular container can be assumed to be the responsibility of the CDI spec's vendor until proven otherwise. Reaching a final decision on this aspect of CDI integration is one of the gates for the feature to graduate from experimental @TBBle. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be possible to capture the applied CDI spec into the container config or metadata in some way? One of the advantages of CDI is that the specs can be dynamic, e.g. generated just-in-time for that container, and then discarded afterwards, or even updated in-place, I believe. This could make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was not aware that dynamic CDI specs were a thing. Given that, some mechanism for reporting the applied CDI spec when inspecting a container will be necessary. What affordances does cri-containerd provide for troubleshooting containers which have dynamic CDI specs applied to them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC, in containerd-cri, CDI is applied to the OCI spec before it's passed to the runtime shim, so the config.json on-disk will have the results of applying the CDI annotations. I'm not sure what debugging options are available at the CRI layer though (e.g., can we dump the historical CRI requests that led to this container's state), and I haven't looked into CDI's support for this, beyond the mention in docker/cli#3864 (comment) that it has APIs to support inspection; so it's possible containerd-cri doesn't currently expose CDI to inspection, just the resulting OCI spec. Edit: Quick check, and CDI is applied entirely by the containerd-cri plugin, so what the containerd core service gets is with CDI already applied, and so containerd itself is totally agnostic to CDI. More edit, really going to bed after this: As it happens, right now, So anyway, I think for containerd-cri we get to see the CDI device name in the CRI API, and the OCI spec that has had the edits applied in containerd core, but the same risk of "cdi spec now isn't the cdi spec used" applies. For dynamic CDI specs, NVidia already has a non-production-ready Dynamic Resource Allocation setup for Multi-Instance GPUs using CDI, and creating a CDI spec for claims on-the-fly (see There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What you described for containerd-cri would also hold true verbatim for Moby if you substitute the analogous components:
You are correct that cri-dockerd is an adapter between the CRI API and Docker Engine API and so would implement CDI support by translating the I have no objections to the Moby+CDI troubleshooting experience being on par with cri-containerd+CDI's troubleshooting experience. I won't complain if you want to go above and beyond, either. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the discussion here @TBBle and for diving into how things are handled in containerd (and similarly in cri-o). Note that assuming that
With regards to:
One thing to investigate would be to "inject" the requested devices into an empty OCI spec and then store this. This should give a reasonable representation of the edits required for a set of CDI devices. Note that the fesibility of this would have to be properly investigated. Now from a testing perspective, checking the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggest including a HostConfig inspection check in the test, simply because that's a publicly visible API we'll want to maintain, i.e. dockerd-cri's implementation of |
||
ShowStdout: true, | ||
}) | ||
assert.NilError(t, err) | ||
|
||
actualStdout := new(bytes.Buffer) | ||
actualStderr := io.Discard | ||
_, err = stdcopy.StdCopy(actualStdout, actualStderr, reader) | ||
assert.NilError(t, err) | ||
|
||
outlines := strings.Split(actualStdout.String(), "\n") | ||
assert.Assert(t, is.Contains(outlines, "FOO=injected")) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
cdiVersion: "0.3.0" | ||
kind: "vendor1.com/device" | ||
devices: | ||
- name: foo | ||
containerEdits: | ||
env: | ||
- FOO=injected |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In future we could expose this information in
docker info
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure. I have added an item to #45192 where I'm tracking the minor follow-ups.