Skip to content

Commit

Permalink
feat: Add ability to resume snapshots and write back changes to the b…
Browse files Browse the repository at this point in the history
…acking file continously, add API endpoint to `msync` the files backing snapshots and to optionally serialize the VM state
  • Loading branch information
pojntfx committed Apr 5, 2024
1 parent a3b0f09 commit c76df56
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 61 deletions.
4 changes: 4 additions & 0 deletions resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
{
"syscall": "fsync"
},
{
"syscall": "msync",
"comment": "Used for live migration to sync dirty pages"
},
{
"syscall": "close"
},
Expand Down
4 changes: 4 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
{
"syscall": "fsync"
},
{
"syscall": "msync",
"comment": "Used for live migration to sync dirty pages"
},
{
"syscall": "close"
},
Expand Down
8 changes: 8 additions & 0 deletions src/firecracker/src/api_server/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ impl ApiServer {
&METRICS.latencies_us.diff_create_snapshot,
"create diff snapshot",
)),
SnapshotType::Msync => Some((
&METRICS.latencies_us.diff_create_snapshot,
"memory synchronization snapshot",
)),
SnapshotType::MsyncAndState => Some((
&METRICS.latencies_us.diff_create_snapshot,
"memory synchronization and state snapshot",
)),
},
VmmAction::LoadSnapshot(_) => {
Some((&METRICS.latencies_us.load_snapshot, "load snapshot"))
Expand Down
5 changes: 5 additions & 0 deletions src/firecracker/src/api_server/request/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ fn parse_put_snapshot_load(body: &Body) -> Result<ParsedRequest, Error> {
mem_backend,
enable_diff_snapshots: snapshot_config.enable_diff_snapshots,
resume_vm: snapshot_config.resume_vm,
shared: snapshot_config.shared,
};

// Construct the `ParsedRequest` object.
Expand Down Expand Up @@ -175,6 +176,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: false,
shared: false,
};
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
assert!(parsed_request
Expand Down Expand Up @@ -202,6 +204,7 @@ mod tests {
},
enable_diff_snapshots: true,
resume_vm: false,
shared: false,
};
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
assert!(parsed_request
Expand Down Expand Up @@ -229,6 +232,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: true,
shared: false,
};
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
assert!(parsed_request
Expand All @@ -253,6 +257,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: true,
shared: false,
};
let parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
assert_eq!(
Expand Down
7 changes: 7 additions & 0 deletions src/firecracker/swagger/firecracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,8 @@ definitions:
enum:
- Full
- Diff
- Msync
- MsyncAndState
description:
Type of snapshot to create. It is optional and by default, a full
snapshot is created.
Expand Down Expand Up @@ -1234,6 +1236,11 @@ definitions:
type: boolean
description:
When set to true, the vm is also resumed if the snapshot load is successful.
shared:
type: boolean
description: When set to true and the guest memory backend is a file,
changes to the memory are asynchronously written back to the
backend as the VM is running.

TokenBucket:
type: object
Expand Down
6 changes: 6 additions & 0 deletions src/vmm/src/logger/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,10 @@ pub struct PerformanceMetrics {
pub vmm_full_create_snapshot: SharedStoreMetric,
/// Measures the snapshot diff create time, at the VMM level, in microseconds.
pub vmm_diff_create_snapshot: SharedStoreMetric,
/// Measures the snapshot memory synchronization time, at the VMM level, in microseconds.
pub vmm_msync_create_snapshot: SharedStoreMetric,
/// Measures the snapshot memory synchronization and state time, at the VMM level, in microseconds.
pub vmm_msync_and_state_create_snapshot: SharedStoreMetric,
/// Measures the snapshot load time, at the VMM level, in microseconds.
pub vmm_load_snapshot: SharedStoreMetric,
/// Measures the microVM pausing duration, at the VMM level, in microseconds.
Expand All @@ -634,6 +638,8 @@ impl PerformanceMetrics {
resume_vm: SharedStoreMetric::new(),
vmm_full_create_snapshot: SharedStoreMetric::new(),
vmm_diff_create_snapshot: SharedStoreMetric::new(),
vmm_msync_create_snapshot: SharedStoreMetric::new(),
vmm_msync_and_state_create_snapshot: SharedStoreMetric::new(),
vmm_load_snapshot: SharedStoreMetric::new(),
vmm_pause_vm: SharedStoreMetric::new(),
vmm_resume_vm: SharedStoreMetric::new(),
Expand Down
139 changes: 85 additions & 54 deletions src/vmm/src/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ pub enum CreateSnapshotError {
UnsupportedVersion,
/// Cannot write memory file: {0}
Memory(MemoryError),
/// Cannot msync memory file: {0}
MemoryMsync(MemoryError),
/// Cannot perform {0} on the memory backing file: {1}
MemoryBackingFile(&'static str, io::Error),
/// Cannot save the microVM state: {0}
Expand All @@ -163,11 +165,16 @@ pub fn create_snapshot(
vm_info: &VmInfo,
params: &CreateSnapshotParams,
) -> Result<(), CreateSnapshotError> {
let microvm_state = vmm
.save_state(vm_info)
.map_err(CreateSnapshotError::MicrovmState)?;
match params.snapshot_type {
SnapshotType::Diff | SnapshotType::Full | SnapshotType::MsyncAndState => {
let microvm_state = vmm
.save_state(vm_info)
.map_err(CreateSnapshotError::MicrovmState)?;

snapshot_state_to_file(&microvm_state, &params.snapshot_path)?;
snapshot_state_to_file(&microvm_state, &params.snapshot_path)?;
}
SnapshotType::Msync => (),
}

snapshot_memory_to_file(vmm, &params.mem_file_path, params.snapshot_type)?;

Expand Down Expand Up @@ -211,55 +218,63 @@ fn snapshot_memory_to_file(
) -> Result<(), CreateSnapshotError> {
use self::CreateSnapshotError::*;

// Need to check this here, as we create the file in the line below
let file_existed = mem_file_path.exists();
match snapshot_type {
SnapshotType::Diff | SnapshotType::Full => {
// Need to check this here, as we create the file in the line below
let file_existed = mem_file_path.exists();

let mut file = OpenOptions::new()
.write(true)
.create(true)
.open(mem_file_path)
.map_err(|err| MemoryBackingFile("open", err))?;

// Determine what size our total memory area is.
let mem_size_mib = mem_size_mib(vmm.guest_memory());
let expected_size = mem_size_mib * 1024 * 1024;

if file_existed {
let file_size = file
.metadata()
.map_err(|e| MemoryBackingFile("get_metadata", e))?
.len();

// Here we only truncate the file if the size mismatches.
// - For full snapshots, the entire file's contents will be overwritten anyway. We have to
// avoid truncating here to deal with the edge case where it represents the snapshot file
// from which this very microVM was loaded (as modifying the memory file would be
// reflected in the mmap of the file, meaning a truncate operation would zero out guest
// memory, and thus corrupt the VM).
// - For diff snapshots, we want to merge the diff layer directly into the file.
if file_size != expected_size {
file.set_len(0)
.map_err(|err| MemoryBackingFile("truncate", err))?;
}
}

let mut file = OpenOptions::new()
.write(true)
.create(true)
.open(mem_file_path)
.map_err(|err| MemoryBackingFile("open", err))?;

// Determine what size our total memory area is.
let mem_size_mib = mem_size_mib(vmm.guest_memory());
let expected_size = mem_size_mib * 1024 * 1024;

if file_existed {
let file_size = file
.metadata()
.map_err(|e| MemoryBackingFile("get_metadata", e))?
.len();

// Here we only truncate the file if the size mismatches.
// - For full snapshots, the entire file's contents will be overwritten anyway. We have to
// avoid truncating here to deal with the edge case where it represents the snapshot file
// from which this very microVM was loaded (as modifying the memory file would be
// reflected in the mmap of the file, meaning a truncate operation would zero out guest
// memory, and thus corrupt the VM).
// - For diff snapshots, we want to merge the diff layer directly into the file.
if file_size != expected_size {
file.set_len(0)
.map_err(|err| MemoryBackingFile("truncate", err))?;
// Set the length of the file to the full size of the memory area.
file.set_len(expected_size)
.map_err(|e| MemoryBackingFile("set_length", e))?;

match snapshot_type {
SnapshotType::Diff => {
let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?;
vmm.guest_memory()
.dump_dirty(&mut file, &dirty_bitmap)
.map_err(Memory)
}
SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory),
_ => Ok(()),
}?;
file.flush()
.map_err(|err| MemoryBackingFile("flush", err))?;
file.sync_all()
.map_err(|err| MemoryBackingFile("sync_all", err))
}
}

// Set the length of the file to the full size of the memory area.
file.set_len(expected_size)
.map_err(|e| MemoryBackingFile("set_length", e))?;

match snapshot_type {
SnapshotType::Diff => {
let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?;
vmm.guest_memory()
.dump_dirty(&mut file, &dirty_bitmap)
.map_err(Memory)
SnapshotType::Msync | SnapshotType::MsyncAndState => {
vmm.guest_memory().msync().map_err(MemoryMsync)
}
SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory),
}?;
file.flush()
.map_err(|err| MemoryBackingFile("flush", err))?;
file.sync_all()
.map_err(|err| MemoryBackingFile("sync_all", err))
}
}

/// Validates that snapshot CPU vendor matches the host CPU vendor.
Expand Down Expand Up @@ -421,6 +436,7 @@ pub fn restore_from_snapshot(
mem_state,
track_dirty_pages,
vm_resources.vm_config.huge_pages,
params.shared,
)
.map_err(RestoreFromSnapshotGuestMemoryError::File)?,
None,
Expand Down Expand Up @@ -488,10 +504,24 @@ fn guest_memory_from_file(
mem_state: &GuestMemoryState,
track_dirty_pages: bool,
huge_pages: HugePageConfig,
shared: bool,
) -> Result<GuestMemoryMmap, GuestMemoryFromFileError> {
let mem_file = File::open(mem_file_path)?;
let guest_mem =
GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?;
let mem_file = if shared {
OpenOptions::new()
.read(true)
.write(true)
.open(mem_file_path)?
} else {
File::open(mem_file_path)?
};

let guest_mem = GuestMemoryMmap::from_state(
Some(&mem_file),
mem_state,
track_dirty_pages,
huge_pages,
shared,
)?;
Ok(guest_mem)
}

Expand Down Expand Up @@ -550,7 +580,8 @@ fn create_guest_memory(
track_dirty_pages: bool,
huge_pages: HugePageConfig,
) -> Result<(GuestMemoryMmap, Vec<GuestRegionUffdMapping>), GuestMemoryFromUffdError> {
let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?;
let guest_memory =
GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages, false)?;
let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions());
for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) {
backend_mappings.push(GuestRegionUffdMapping {
Expand Down
24 changes: 24 additions & 0 deletions src/vmm/src/rpc_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,26 @@ impl RuntimeApiController {
elapsed_time_us
);
}
SnapshotType::Msync => {
let elapsed_time_us = update_metric_with_elapsed_time(
&METRICS.latencies_us.vmm_msync_create_snapshot,
create_start_us,
);
info!(
"'create memory synchronization snapshot' VMM action took {} us.",
elapsed_time_us
);
}
SnapshotType::MsyncAndState => {
let elapsed_time_us = update_metric_with_elapsed_time(
&METRICS.latencies_us.vmm_msync_and_state_create_snapshot,
create_start_us,
);
info!(
"'create memory synchronization and state snapshot' VMM action took {} us.",
elapsed_time_us
);
}
}
Ok(VmmData::Empty)
}
Expand Down Expand Up @@ -1733,6 +1753,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: false,
shared: false,
});
// Request should succeed.
preboot.handle_preboot_request(req).unwrap();
Expand All @@ -1749,6 +1770,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: true,
shared: false,
});
// Request should succeed.
preboot.handle_preboot_request(req).unwrap();
Expand Down Expand Up @@ -2130,6 +2152,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: false,
shared: false,
}),
VmmActionError::OperationNotSupportedPostBoot,
);
Expand All @@ -2156,6 +2179,7 @@ mod tests {
},
enable_diff_snapshots: false,
resume_vm: false,
shared: false,
});
let err = preboot.handle_preboot_request(req);
assert_eq!(
Expand Down
11 changes: 11 additions & 0 deletions src/vmm/src/vmm_config/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ pub enum SnapshotType {
/// Full snapshot.
#[default]
Full,
/// Memory synchronization snapshot.
Msync,
/// Memory synchronization and state snapshot.
MsyncAndState,
}

/// Specifies the method through which guest memory will get populated when
Expand Down Expand Up @@ -60,6 +64,10 @@ pub struct LoadSnapshotParams {
/// When set to true, the vm is also resumed if the snapshot load
/// is successful.
pub resume_vm: bool,
/// When set to true and the guest memory backend is a file,
/// changes to the memory are asynchronously written back to the
/// backend as the VM is running.
pub shared: bool,
}

/// Stores the configuration for loading a snapshot that is provided by the user.
Expand All @@ -82,6 +90,9 @@ pub struct LoadSnapshotConfig {
/// Whether or not to resume the vm post snapshot load.
#[serde(default)]
pub resume_vm: bool,
/// Whether or not to asynchronously write back memory changes to the backing file.
#[serde(default)]
pub shared: bool,
}

/// Stores the configuration used for managing snapshot memory.
Expand Down
Loading

0 comments on commit c76df56

Please sign in to comment.