From 472097790b59c13ed696cb896a23e991d25885a1 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 9 Apr 2024 16:00:19 -0700 Subject: [PATCH] Support passing oci devices on pod boot Signed-off-by: Kathryn Baldauf --- internal/devices/assigned_devices.go | 23 +++++++++++++--- internal/hcsoci/devices.go | 21 +++----------- internal/oci/uvm.go | 27 ++++++++++++++++++ internal/uvm/create.go | 9 ++++++ internal/uvm/create_lcow.go | 41 +++++++++++++++++++++++++++- internal/uvm/create_wcow.go | 2 +- internal/uvm/types.go | 4 +-- internal/uvm/virtual_device.go | 33 +++++++++++++++------- 8 files changed, 125 insertions(+), 35 deletions(-) diff --git a/internal/devices/assigned_devices.go b/internal/devices/assigned_devices.go index 4750776403..50a840b46d 100644 --- a/internal/devices/assigned_devices.go +++ b/internal/devices/assigned_devices.go @@ -6,6 +6,8 @@ package devices import ( "context" "fmt" + "path/filepath" + "strconv" "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/log" @@ -13,10 +15,9 @@ import ( "github.com/pkg/errors" ) -// AddDevice is the api exposed to hcsoci to handle assigning a device on a UVM +// AddDevice is the api exposed to oci/hcsoci to handle assigning a device on a WCOW UVM // -// `idType` refers to the specified device's type, supported types here are `VPCIDeviceIDType` -// and `VPCIDeviceIDTypeLegacy`. +// `idType` refers to the specified device's type. // // `deviceID` refers to the specified device's identifier. This must refer to a device instance id // for hyper-v isolated device assignment. @@ -38,7 +39,8 @@ func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID string, vpci = nil } }() - if idType == uvm.VPCIDeviceIDType || idType == uvm.VPCIDeviceIDTypeLegacy { + + if uvm.IsValidDeviceType(idType) { vpci, err = vm.AssignDevice(ctx, deviceID, index, "") if err != nil { return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) @@ -105,3 +107,16 @@ func createDeviceUtilChildrenCommand(deviceUtilPath string, vmBusInstanceID stri args := []string{deviceUtilPath, "children", parentIDsFlag, "--property=location"} return args } + +// GetDeviceInfoFromPath takes a device path and parses it into the PCI ID and +// virtual function index if one is specified. +func GetDeviceInfoFromPath(rawDevicePath string) (string, uint16) { + indexString := filepath.Base(rawDevicePath) + index, err := strconv.ParseUint(indexString, 10, 16) + if err == nil { + // we have a vf index + return filepath.Dir(rawDevicePath), uint16(index) + } + // otherwise, just use default index and full device ID given + return rawDevicePath, 0 +} diff --git a/internal/hcsoci/devices.go b/internal/hcsoci/devices.go index 6da017f78b..cf6f45c273 100644 --- a/internal/hcsoci/devices.go +++ b/internal/hcsoci/devices.go @@ -8,7 +8,6 @@ import ( "fmt" "os" "path/filepath" - "strconv" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -131,7 +130,7 @@ func handleAssignedDevicesWindows( // assign device into UVM and create corresponding spec windows devices for _, d := range specDevs { - pciID, index := getDeviceInfoFromPath(d.ID) + pciID, index := devices.GetDeviceInfoFromPath(d.ID) vpciCloser, locationPaths, err := devices.AddDevice(ctx, vm, d.IDType, pciID, index, deviceUtilPath) if err != nil { return nil, nil, err @@ -175,9 +174,8 @@ func handleAssignedDevicesLCOW( // assign device into UVM and create corresponding spec windows devices for _, d := range specDevs { - switch d.IDType { - case uvm.VPCIDeviceIDType, uvm.VPCIDeviceIDTypeLegacy, uvm.GPUDeviceIDType: - pciID, index := getDeviceInfoFromPath(d.ID) + if uvm.IsValidDeviceType(d.IDType) { + pciID, index := devices.GetDeviceInfoFromPath(d.ID) vpci, err := vm.AssignDevice(ctx, pciID, index, "") if err != nil { return resultDevs, closers, errors.Wrapf(err, "failed to assign device %s, function %d to pod %s", pciID, index, vm.ID()) @@ -188,7 +186,7 @@ func handleAssignedDevicesLCOW( // map into the container d.ID = vpci.VMBusGUID resultDevs = append(resultDevs, d) - default: + } else { return resultDevs, closers, errors.Errorf("specified device %s has unsupported type %s", d.ID, d.IDType) } } @@ -225,14 +223,3 @@ func addSpecGuestDrivers(ctx context.Context, vm *uvm.UtilityVM, annotations map } return closers, err } - -func getDeviceInfoFromPath(rawDevicePath string) (string, uint16) { - indexString := filepath.Base(rawDevicePath) - index, err := strconv.ParseUint(indexString, 10, 16) - if err == nil { - // we have a vf index - return filepath.Dir(rawDevicePath), uint16(index) - } - // otherwise, just use default index and full device ID given - return rawDevicePath, 0 -} diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index ccf6506072..df7b0173fc 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -10,6 +10,7 @@ import ( runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" iannotations "github.com/Microsoft/hcsshim/internal/annotations" + "github.com/Microsoft/hcsshim/internal/devices" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/pkg/annotations" @@ -227,6 +228,28 @@ func handleSecurityPolicy(ctx context.Context, a map[string]string, lopts *uvm.O } } +func parseDevices(ctx context.Context, specWindows *specs.Windows) []uvm.VPCIDeviceID { + if specWindows == nil || specWindows.Devices == nil { + return nil + } + extraDevices := make([]uvm.VPCIDeviceID, len(specWindows.Devices)) + for i, d := range specWindows.Devices { + pciID, index := devices.GetDeviceInfoFromPath(d.ID) + if uvm.IsValidDeviceType(d.IDType) { + key := uvm.NewVPCIDeviceID(pciID, index) + extraDevices[i] = key + } else { + log.G(ctx).WithFields(logrus.Fields{ + "device": d, + }).Warnf("device type %s invalid, skipping", d.IDType) + } + } + // nil out the devices on the spec so that they aren't re-added to the + // pause container. + specWindows.Devices = nil + return extraDevices +} + // sets options common to both WCOW and LCOW from annotations. func specToUVMCreateOptionsCommon(ctx context.Context, opts *uvm.Options, s *specs.Spec) { opts.MemorySizeInMB = ParseAnnotationsMemory(ctx, s, annotations.MemorySizeInMB, opts.MemorySizeInMB) @@ -298,6 +321,10 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.DmVerityMode = ParseAnnotationsBool(ctx, s.Annotations, annotations.DmVerityMode, lopts.DmVerityMode) // Set HclEnabled if specified. Else default to a null pointer, which is omitted from the resulting JSON. lopts.HclEnabled = ParseAnnotationsNullableBool(ctx, s.Annotations, annotations.HclEnabled) + + // Add devices on the spec to the UVM's options + lopts.AssignedDevices = parseDevices(ctx, s.Windows) + return lopts, nil } else if IsWCOW(s) { wopts := uvm.NewDefaultOptionsWCOW(id, owner) diff --git a/internal/uvm/create.go b/internal/uvm/create.go index fa28857617..7d9ee8fa66 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -226,6 +226,15 @@ func (uvm *UtilityVM) CloseCtx(ctx context.Context) (err error) { windows.Close(uvm.vmmemProcess) if uvm.hcsSystem != nil { + for key, dev := range uvm.vpciDevices { + // Try to remove any devices that are still on the UVM, but do not + // fail the close if there is an error. + // This would be devices that were added on pod creation. + if err := dev.Release(ctx); err != nil { + log.G(ctx).Errorf("graceful removal of vpci device %v failed: %s", key, err) + } + } + _ = uvm.hcsSystem.Terminate(ctx) // uvm.Wait() waits on <-uvm.outputProcessingDone, which may not be closed until below // (for a Create -> Stop without a Start), or uvm.outputHandler may be blocked on IO and diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index c41f415f78..5fb2eb45f9 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -132,6 +132,7 @@ type OptionsLCOW struct { DisableTimeSyncService bool // Disables the time synchronization service HclEnabled *bool // Whether to enable the host compatibility layer ExtraVSockPorts []uint32 // Extra vsock ports to allow + AssignedDevices []VPCIDeviceID // AssignedDevices are devices to add on pod boot } // defaultLCOWOSBootFilesPath returns the default path used to locate the LCOW @@ -649,6 +650,44 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs }, } + // Add optional devices that were specified on the UVM spec + if len(opts.AssignedDevices) > 0 { + if doc.VirtualMachine.Devices.VirtualPci == nil { + doc.VirtualMachine.Devices.VirtualPci = make(map[string]hcsschema.VirtualPciDevice) + } + for _, d := range opts.AssignedDevices { + // we don't need to hold the modify lock here because the UVM has + // not yet been created. + existingDevice := uvm.vpciDevices[d] + if existingDevice != nil { + return nil, fmt.Errorf("device %s with index %d is specified multiple times", d.deviceInstanceID, d.virtualFunctionIndex) + } + + vmbusGUID, err := guid.NewV4() + if err != nil { + return nil, err + } + + doc.VirtualMachine.Devices.VirtualPci[vmbusGUID.String()] = hcsschema.VirtualPciDevice{ + Functions: []hcsschema.VirtualPciFunction{ + { + DeviceInstancePath: d.deviceInstanceID, + VirtualFunction: d.virtualFunctionIndex, + }, + }, + } + + device := &VPCIDevice{ + vm: uvm, + VMBusGUID: vmbusGUID.String(), + deviceInstanceID: d.deviceInstanceID, + virtualFunctionIndex: d.virtualFunctionIndex, + refCount: 1, + } + uvm.vpciDevices[d] = device + } + } + maps.Copy(doc.VirtualMachine.Devices.HvSocket.HvSocketConfig.ServiceTable, opts.AdditionalHyperVConfig) // Handle StorageQoS if set @@ -862,7 +901,7 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error scsiControllerCount: opts.SCSIControllerCount, vpmemMaxCount: opts.VPMemDeviceCount, vpmemMaxSizeBytes: opts.VPMemSizeBytes, - vpciDevices: make(map[VPCIDeviceKey]*VPCIDevice), + vpciDevices: make(map[VPCIDeviceID]*VPCIDevice), physicallyBacked: !opts.AllowOvercommit, devicesPhysicallyBacked: opts.FullyPhysicallyBacked, createOpts: opts, diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index f413720b5c..6af8989157 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -263,7 +263,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error scsiControllerCount: opts.SCSIControllerCount, vsmbDirShares: make(map[string]*VSMBShare), vsmbFileShares: make(map[string]*VSMBShare), - vpciDevices: make(map[VPCIDeviceKey]*VPCIDevice), + vpciDevices: make(map[VPCIDeviceID]*VPCIDevice), noInheritHostTimezone: opts.NoInheritHostTimezone, physicallyBacked: !opts.AllowOvercommit, devicesPhysicallyBacked: opts.FullyPhysicallyBacked, diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 6d736f837e..2aeabd9636 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -93,8 +93,8 @@ type UtilityVM struct { scsiControllerCount uint32 // Number of SCSI controllers in the utility VM reservedSCSISlots []scsi.Slot - encryptScratch bool // Enable scratch encryption - vpciDevices map[VPCIDeviceKey]*VPCIDevice // map of device instance id to vpci device + encryptScratch bool // Enable scratch encryption + vpciDevices map[VPCIDeviceID]*VPCIDevice // map of device instance id to vpci device // Plan9 are directories mapped into a Linux utility VM plan9Counter uint64 // Each newly-added plan9 share has a counter used as its ID in the ResourceURI and for the name diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index 9fac786788..594bf0a541 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -27,11 +27,18 @@ const ( const vmbusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" const assignedDeviceEnumerator = "VMBUS" -type VPCIDeviceKey struct { +type VPCIDeviceID struct { deviceInstanceID string virtualFunctionIndex uint16 } +func NewVPCIDeviceID(deviceInstanceID string, virtualFunctionIndex uint16) VPCIDeviceID { + return VPCIDeviceID{ + deviceInstanceID: deviceInstanceID, + virtualFunctionIndex: virtualFunctionIndex, + } +} + // VPCIDevice represents a vpci device. Holds its guid and a handle to the uvm it // belongs to. type VPCIDevice struct { @@ -72,8 +79,14 @@ func (vpci *VPCIDevice) Release(ctx context.Context) error { return nil } -// AssignDevice assigns a vpci device to the uvm -// if the device already exists, the stored VPCIDevice's ref count is increased +func IsValidDeviceType(deviceType string) bool { + return (deviceType == VPCIDeviceIDType) || + (deviceType == VPCIDeviceIDTypeLegacy) || + (deviceType == GPUDeviceIDType) +} + +// AssignDevice assigns a vpci device to a uvm. +// If the device already exists, the stored VPCIDevice's ref count is increased // and the VPCIDevice is returned. // Otherwise, a new request is made to assign the target device indicated by the deviceID // onto the UVM. A new VPCIDevice entry is made on the UVM and the VPCIDevice is returned @@ -88,7 +101,7 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u vmBusGUID = guid.String() } - key := VPCIDeviceKey{ + key := VPCIDeviceID{ deviceInstanceID: deviceID, virtualFunctionIndex: index, } @@ -135,22 +148,22 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u if err := uvm.modify(ctx, request); err != nil { return nil, err } - result := &VPCIDevice{ + device := &VPCIDevice{ vm: uvm, VMBusGUID: vmBusGUID, - deviceInstanceID: deviceID, - virtualFunctionIndex: index, + deviceInstanceID: key.deviceInstanceID, + virtualFunctionIndex: key.virtualFunctionIndex, refCount: 1, } - uvm.vpciDevices[key] = result - return result, nil + uvm.vpciDevices[key] = device + return device, nil } // RemoveDevice removes a vpci device from a uvm when there are // no more references to a given VPCIDevice. Otherwise, decrements // the reference count of the stored VPCIDevice and returns nil. func (uvm *UtilityVM) RemoveDevice(ctx context.Context, deviceInstanceID string, index uint16) error { - key := VPCIDeviceKey{ + key := VPCIDeviceID{ deviceInstanceID: deviceInstanceID, virtualFunctionIndex: index, }