Skip to content

Commit

Permalink
Merge pull request #559 from stgraber/main
Browse files Browse the repository at this point in the history
Improve GPU SR-IOV allocation
  • Loading branch information
hallyn authored Feb 29, 2024
2 parents 2718592 + b4e97da commit 9af3a02
Showing 1 changed file with 50 additions and 81 deletions.
131 changes: 50 additions & 81 deletions internal/server/device/gpu_sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ package device

import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"sync"

"github.com/lxc/incus/internal/linux"
"github.com/lxc/incus/internal/revert"
Expand All @@ -17,6 +14,9 @@ import (
"github.com/lxc/incus/shared/util"
)

// sriovMu is used to lock concurrent GPU allocations.
var sriovMu sync.Mutex

type gpuSRIOV struct {
deviceCommon
}
Expand Down Expand Up @@ -81,53 +81,22 @@ func (d *gpuSRIOV) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
saveData := make(map[string]string)

// Get SRIOV parent, i.e. the actual GPU.
parentPCIAddresses, err := d.getParentPCIAddresses()
if err != nil {
return nil, err
}

var parentPCIAddress string
var pciParentDev pcidev.Device
vfID := -1

// Make sure that vfio-pci is loaded.
err = linux.LoadModule("vfio-pci")
if err != nil {
return nil, fmt.Errorf("Error loading %q module: %w", "vfio-pci", err)
}

// Since there might be multiple GPUs, we iterate through them and get the first free
// virtual function.
for _, parentPCIAddress = range parentPCIAddresses {
// Get PCI information about the GPU device.
devicePath := filepath.Join("/sys/bus/pci/devices", parentPCIAddress)

pciParentDev, err = pcidev.ParseUeventFile(filepath.Join(devicePath, "uevent"))
if err != nil {
err = fmt.Errorf("Failed to get PCI device info for GPU %q: %w", parentPCIAddress, err)
continue
}

vfID, err = d.findFreeVirtualFunction(pciParentDev)
if err != nil {
err = fmt.Errorf("Failed to find free virtual function: %w", err)
continue
}

if vfID > -1 {
break
}
}
// Get global SR-IOV lock to prevent concurent allocations of the VF.
sriovMu.Lock()
defer sriovMu.Unlock()

// Get SRIOV VF.
parentPCIAddress, vfID, err := d.getVF()
if err != nil {
return nil, err
}

if vfID == -1 {
return nil, fmt.Errorf("All virtual functions on parent device seem to be in use")
}

vfPCIDev, err := d.setupSriovParent(parentPCIAddress, vfID, saveData)
if err != nil {
return nil, err
Expand All @@ -146,29 +115,62 @@ func (d *gpuSRIOV) Start() (*deviceConfig.RunConfig, error) {
return &runConf, nil
}

// getParentPCIAddresses returns the PCI addresses of parent GPUs.
func (d *gpuSRIOV) getParentPCIAddresses() ([]string, error) {
// getVF returns the parent PCI address and VF id for a matching GPU.
func (d *gpuSRIOV) getVF() (string, int, error) {
// List all the GPUs.
gpus, err := resources.GetGPU()
if err != nil {
return nil, err
return "", -1, err
}

var parentPCIAddresses []string
// Locate a suitable VF from the least loaded suitable card.
var pciAddress string
var vfID int
var cardTotal int
var cardAvailable int

for _, gpu := range gpus.Cards {
// Skip any cards that are not selected.
if !gpuSelected(d.Config(), gpu) {
continue
}

parentPCIAddresses = append(parentPCIAddresses, gpu.PCIAddress)
// Skip any card without SR-IOV.
if gpu.SRIOV == nil {
continue
}

// Find available VFs.
vfs := []int{}

for id, vf := range gpu.SRIOV.VFs {
if vf.Driver == "" {
vfs = append(vfs, id)
}
}

// Skip if no available VFs.
if len(vfs) == 0 {
continue
}

// Check if current card is less busy.
if (float64(len(vfs)) / float64(gpu.SRIOV.CurrentVFs)) <= (float64(cardAvailable) / float64(cardTotal)) {
continue
}

pciAddress = gpu.PCIAddress
vfID = vfs[0]
cardAvailable = len(vfs)
cardTotal = int(gpu.SRIOV.CurrentVFs)
}

if len(parentPCIAddresses) == 0 {
return nil, fmt.Errorf("Failed to detect requested GPU device")
// Check if any physical GPU was found to match.
if pciAddress == "" {
return "", -1, fmt.Errorf("Couldn't find a matching GPU with available VFs")
}

return parentPCIAddresses, nil
return pciAddress, vfID, nil
}

// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original properties of
Expand Down Expand Up @@ -220,39 +222,6 @@ func (d *gpuSRIOV) getVFDevicePCISlot(parentPCIAddress string, vfID string) (pci
return pciDev, nil
}

func (d *gpuSRIOV) findFreeVirtualFunction(parentDev pcidev.Device) (int, error) {
// Get number of currently enabled VFs.
sriovNumVFs := fmt.Sprintf("/sys/bus/pci/devices/%s/sriov_numvfs", parentDev.SlotName)

sriovNumVfsBuf, err := os.ReadFile(sriovNumVFs)
if err != nil {
return 0, err
}

sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
sriovNum, err := strconv.Atoi(sriovNumVfsStr)
if err != nil {
return 0, err
}

vfID := -1

for i := 0; i < sriovNum; i++ {
pciDev, err := pcidev.ParseUeventFile(fmt.Sprintf("/sys/bus/pci/devices/%s/virtfn%d/uevent", parentDev.SlotName, i))
if err != nil {
return 0, err
}

// We assume the virtual function is free if there's no driver bound to it.
if pciDev.Driver == "" {
vfID = i
break
}
}

return vfID, nil
}

// Stop is run when the device is removed from the instance.
func (d *gpuSRIOV) Stop() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{
Expand Down

0 comments on commit 9af3a02

Please sign in to comment.