Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dragonball: add pci vfio passthrough, hot(un)plug support #8740

Merged
merged 4 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions src/dragonball/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions src/dragonball/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dbs-utils = { path = "./src/dbs_utils" }
dbs-virtio-devices = { path = "./src/dbs_virtio_devices", optional = true, features = [
"virtio-mmio",
] }
dbs-pci = { path = "./src/dbs_pci", optional = true }
derivative = "2.2.0"
kvm-bindings = "0.6.0"
kvm-ioctls = "0.12.0"
Expand All @@ -48,6 +49,8 @@ virtio-queue = { version = "0.7.0", optional = true }
vm-memory = { version = "0.10.0", features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
fuse-backend-rs = "0.10.5"
vfio-bindings = { version = "0.3.0", optional = true }
vfio-ioctls = { version = "0.1.0", optional = true }

[dev-dependencies]
slog-async = "2.7.0"
Expand Down Expand Up @@ -77,3 +80,4 @@ vhost-net = ["dbs-virtio-devices/vhost-net"]
vhost-user-fs = ["dbs-virtio-devices/vhost-user-fs"]
vhost-user-net = ["dbs-virtio-devices/vhost-user-net"]
vhost-user-blk = ["dbs-virtio-devices/vhost-user-blk"]
host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"]
135 changes: 133 additions & 2 deletions src/dragonball/src/api/v1/vmm_action.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
use std::fs::File;
use std::sync::{Arc, Mutex};

use crossbeam_channel::{Receiver, Sender, TryRecvError};
use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError};
use log::{debug, error, info, warn};
use tracing::instrument;

use crate::error::{Result, StartMicroVmError, StopMicrovmError};
use crate::event_manager::EventManager;
use crate::tracer::{DragonballTracer, TraceError, TraceInfo};
use crate::vcpu::VcpuManagerError;
use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo};
use crate::vmm::Vmm;

Expand All @@ -36,6 +37,8 @@ pub use crate::device_manager::fs_dev_mgr::{
};
#[cfg(feature = "virtio-mem")]
pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError};
#[cfg(feature = "host-device")]
use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError, VfioDeviceHostInfo};
#[cfg(feature = "vhost-net")]
pub use crate::device_manager::vhost_net_dev_mgr::{
VhostNetDeviceConfigInfo, VhostNetDeviceError, VhostNetDeviceMgr,
Expand Down Expand Up @@ -148,11 +151,20 @@ pub enum VmmActionError {
/// End tracing Failed.
#[error("End tracing failed: {0}")]
EndTracingFailed(#[source] TraceError),

#[cfg(feature = "host-device")]
/// The action `InsertHostDevice` failed either because of bad user input or an internal error.
#[error("failed to add VFIO passthrough device: {0:?}")]
HostDeviceConfig(#[source] VfioDeviceError),
#[cfg(feature = "host-device")]
/// The action 'RemoveHostDevice' failed because of vcpu manager internal error.
#[error("remove host device error: {0}")]
RemoveHostDevice(#[source] VcpuManagerError),
}

/// This enum represents the public interface of the VMM. Each action contains various
/// bits of information (ids, paths, etc.).
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq)]
pub enum VmmAction {
/// Configure the boot source of the microVM using `BootSourceConfig`.
/// This action can only be called before the microVM has booted.
Expand Down Expand Up @@ -245,6 +257,18 @@ pub enum VmmAction {
/// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig`
/// as input.
InsertBalloonDevice(BalloonDeviceConfigInfo),

#[cfg(feature = "host-device")]
/// Add a VFIO assignment host device or update that already exists
InsertHostDevice(HostDeviceConfig),

#[cfg(feature = "host-device")]
/// Prepare to remove a VFIO assignment host device that already exists
PrepareRemoveHostDevice(String),

#[cfg(feature = "host-device")]
/// Add a VFIO assignment host device or update that already exists
RemoveHostDevice(String),
}

/// The enum represents the response sent by the VMM in case of success. The response is either
Expand All @@ -257,6 +281,8 @@ pub enum VmmData {
MachineConfiguration(Box<VmConfigInfo>),
/// Prometheus Metrics represented by String.
HypervisorMetrics(String),
/// Sync Hotplug
SyncHotplug((Sender<Option<i32>>, Receiver<Option<i32>>)),
}

/// Request data type used to communicate between the API and the VMM.
Expand Down Expand Up @@ -371,6 +397,14 @@ impl VmmService {
VmmAction::InsertBalloonDevice(balloon_cfg) => {
self.add_balloon_device(vmm, event_mgr, balloon_cfg)
}
#[cfg(feature = "host-device")]
VmmAction::InsertHostDevice(hostdev_cfg) => self.add_vfio_device(vmm, hostdev_cfg),
#[cfg(feature = "host-device")]
VmmAction::PrepareRemoveHostDevice(hostdev_id) => {
self.prepare_remove_vfio_device(vmm, &hostdev_id)
}
#[cfg(feature = "host-device")]
VmmAction::RemoveHostDevice(hostdev_cfg) => self.remove_vfio_device(vmm, &hostdev_cfg),
};

debug!("send vmm response: {:?}", response);
Expand Down Expand Up @@ -539,6 +573,8 @@ impl VmmService {
// - Some(path), legacy_manager will create_socket_console on that path.
config.serial_path = machine_config.serial_path;

config.pci_hotplug_enabled = machine_config.pci_hotplug_enabled;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this fine without feature attributes? e.g. #[cfg(all(hotplug, host-device))]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, vm config will always have pci_hotplug_enabled parameter, but when that value pass down, if the #[cfg(all(feature = "hotplug", feature = "host-device"))] not enabled, Dragonball won't process pci_hotplug_enabled


vm.set_vm_config(config.clone());
self.machine_config = config;

Expand Down Expand Up @@ -813,6 +849,101 @@ impl VmmService {
.map_err(VmmActionError::FsDevice)
}

#[cfg(feature = "host-device")]
fn add_vfio_device(&self, vmm: &mut Vmm, config: HostDeviceConfig) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: InvalidVMID -> InvalidVmId

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll raise a separate PR to fix this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#8747 is raised for this problem.

))?;
info!("add_vfio_device: {:?}", config);

let mut ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: MicroVMAlreadyRunning -> MicroVmAlreadyRunning

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;

vm.device_manager()
.vfio_manager
.lock()
.unwrap()
.insert_device(&mut ctx, config.into())
.map_err(VmmActionError::HostDeviceConfig)?;
Ok(VmmData::Empty)
}

// using upcall to unplug the pci device in the guest
#[cfg(feature = "host-device")]
fn prepare_remove_vfio_device(&mut self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
))?;

info!("prepare_remove_vfio_device: {:?}", hostdev_id);
let ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;

let (sender, receiver) = unbounded();

// It is safe because we don't expect poison lock.
let vfio_manager = vm.device_manager.vfio_manager.lock().unwrap();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Give some comments to describe why it is safe?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


vfio_manager
.prepare_remove_device(&ctx, hostdev_id, sender.clone())
.map(|_| VmmData::SyncHotplug((sender, receiver)))
.map_err(VmmActionError::HostDeviceConfig)
}

#[cfg(feature = "host-device")]
fn remove_vfio_device(&self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
))?;

info!("remove_vfio_device: {:?}", hostdev_id);
let mut ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;

// It is safe because we don't expect poison lock.
let mut vfio_manager = vm.device_manager.vfio_manager.lock().unwrap();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


vfio_manager
.remove_device(&mut ctx, hostdev_id)
.map_err(VmmActionError::HostDeviceConfig)?;

// we need to revalidate io_manager cache in all vcpus
// in order to drop old io_manager and close device's fd
vm.vcpu_manager()
.map_err(VmmActionError::RemoveHostDevice)?
.revalidate_all_vcpus_cache()
.map_err(VmmActionError::RemoveHostDevice)?;

// FIXME: we should clear corresponding information because vfio module in
// host kernel will clear iommu table in this scenario.

Ok(VmmData::Empty)
}

#[cfg(feature = "hotplug")]
#[instrument(skip(self))]
fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult {
Expand Down
4 changes: 2 additions & 2 deletions src/dragonball/src/dbs_device/src/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ pub enum Resource {
size: u32,
},
/// Network Interface Card MAC address.
MacAddresss(String),
MacAddress(String),
/// KVM memslot index.
KvmMemSlot(u32),
}
Expand Down Expand Up @@ -310,7 +310,7 @@ impl DeviceResources {
/// Get the first resource information for NIC MAC address.
pub fn get_mac_address(&self) -> Option<String> {
for entry in self.0.iter().as_ref() {
if let Resource::MacAddresss(addr) = entry {
if let Resource::MacAddress(addr) = entry {
return Some(addr.clone());
}
}
Expand Down
1 change: 1 addition & 0 deletions src/dragonball/src/dbs_pci/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ mod msix;
pub use msix::{MsixCap, MsixState, MSIX_TABLE_ENTRY_SIZE};

mod vfio;
pub use vfio::{VfioPciDevice, VfioPciError, VENDOR_NVIDIA};

/// Error codes related to PCI root/bus/device operations.
#[derive(Debug, thiserror::Error)]
Expand Down