From 3a23596d02431453538cd967bd174889429c8038 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Fri, 17 Apr 2026 00:19:08 +0530 Subject: [PATCH] adds HCS live migration APIs Introduces the HCS live migration APIs including destination start, source-side initialize/start/transfer, finalization, and event notifications. Add supporting computecore types and HCS schema definitions for migration options and settings. Signed-off-by: Harsh Rawat --- internal/computecore/computecore.go | 28 -- internal/computecore/types.go | 103 +++++++ internal/hcs/migration.go | 372 ++++++++++++++++++++++++ internal/hcs/schema2/migration.go | 118 ++++++++ internal/hcs/schema2/virtual_machine.go | 2 + internal/hcs/system.go | 61 ++-- 6 files changed, 639 insertions(+), 45 deletions(-) create mode 100644 internal/computecore/types.go create mode 100644 internal/hcs/migration.go create mode 100644 internal/hcs/schema2/migration.go diff --git a/internal/computecore/computecore.go b/internal/computecore/computecore.go index ea1071cb48..e00a287d15 100644 --- a/internal/computecore/computecore.go +++ b/internal/computecore/computecore.go @@ -94,34 +94,6 @@ import ( // errVmcomputeOperationPending is an error encountered when the operation is being completed asynchronously const errVmcomputeOperationPending = syscall.Errno(0xC0370103) -// HcsSystem is the handle associated with a created compute system. -type HcsSystem syscall.Handle - -// HcsProcess is the handle associated with a created process in a compute -// system. -type HcsProcess syscall.Handle - -// HcsOperation is the handle associated with an operation on a compute system. -type HcsOperation syscall.Handle - -// HcsCallback is the handle associated with the function to call when events -// occur. -type HcsCallback syscall.Handle - -// HcsProcessInformation is the structure used when creating or getting process -// info. -type HcsProcessInformation struct { - // ProcessID is the pid of the created process. - ProcessID uint32 - _ uint32 // reserved padding - // StdInput is the handle associated with the stdin of the process. - StdInput syscall.Handle - // StdOutput is the handle associated with the stdout of the process. - StdOutput syscall.Handle - // StdError is the handle associated with the stderr of the process. - StdError syscall.Handle -} - func execute(ctx gcontext.Context, timeout time.Duration, f func() error) error { now := time.Now() if timeout > 0 { diff --git a/internal/computecore/types.go b/internal/computecore/types.go new file mode 100644 index 0000000000..19b5167157 --- /dev/null +++ b/internal/computecore/types.go @@ -0,0 +1,103 @@ +//go:build windows + +package computecore + +import ( + "fmt" + "syscall" +) + +// HcsSystem is the handle associated with a created compute system. +type HcsSystem syscall.Handle + +// HcsProcess is the handle associated with a created process in a compute system. +type HcsProcess syscall.Handle + +// HcsOperation is the handle associated with an operation on a compute system. +type HcsOperation syscall.Handle + +// HcsCallback is the handle associated with the function to call when events occur. +type HcsCallback syscall.Handle + +// HcsProcessInformation is the structure used when creating or getting process info. +type HcsProcessInformation struct { + ProcessID uint32 + _ uint32 // reserved padding + StdInput syscall.Handle + StdOutput syscall.Handle + StdError syscall.Handle +} + +// HcsResourceType specifies the type of resource to add to an operation. +const ( + HcsResourceTypeNone uint32 = 0 + HcsResourceTypeFile uint32 = 1 + HcsResourceTypeJob uint32 = 2 + HcsResourceTypeComObject uint32 = 3 + HcsResourceTypeSocket uint32 = 4 +) + +// HcsEventType represents the type of event received from HCS. +type HcsEventType uint32 + +const ( + HcsEventTypeInvalid HcsEventType = 0x00000000 + HcsEventTypeSystemExited HcsEventType = 0x00000001 + HcsEventTypeSystemCrashInitiated HcsEventType = 0x00000002 + HcsEventTypeSystemCrashReport HcsEventType = 0x00000003 + HcsEventTypeSystemRdpEnhancedModeStateChanged HcsEventType = 0x00000004 + HcsEventTypeSystemSiloJobCreated HcsEventType = 0x00000005 + HcsEventTypeSystemGuestConnectionClosed HcsEventType = 0x00000006 + HcsEventTypeProcessExited HcsEventType = 0x00010000 + HcsEventTypeOperationCallback HcsEventType = 0x01000000 + HcsEventTypeServiceDisconnect HcsEventType = 0x02000000 + HcsEventTypeGroupVMLifecycle HcsEventType = 0x80000002 + HcsEventTypeGroupLiveMigration HcsEventType = 0x80000003 + HcsEventTypeGroupOperationInfo HcsEventType = 0xC0000001 +) + +func (t HcsEventType) String() string { + switch t { + case HcsEventTypeInvalid: + return "Invalid" + case HcsEventTypeSystemExited: + return "SystemExited" + case HcsEventTypeSystemCrashInitiated: + return "SystemCrashInitiated" + case HcsEventTypeSystemCrashReport: + return "SystemCrashReport" + case HcsEventTypeSystemRdpEnhancedModeStateChanged: + return "SystemRdpEnhancedModeStateChanged" + case HcsEventTypeSystemSiloJobCreated: + return "SystemSiloJobCreated" + case HcsEventTypeSystemGuestConnectionClosed: + return "SystemGuestConnectionClosed" + case HcsEventTypeProcessExited: + return "ProcessExited" + case HcsEventTypeOperationCallback: + return "OperationCallback" + case HcsEventTypeServiceDisconnect: + return "ServiceDisconnect" + case HcsEventTypeGroupVMLifecycle: + return "GroupVmLifecycle" + case HcsEventTypeGroupLiveMigration: + return "GroupLiveMigration" + case HcsEventTypeGroupOperationInfo: + return "GroupOperationInfo" + default: + return fmt.Sprintf("Unknown: 0x%08X", uint32(t)) + } +} + +// HcsEventOptions controls which event groups are enabled for a callback. +const ( + HcsEventOptionNone uint32 = 0 + HcsEventOptionEnableOperationCallbacks uint32 = 1 + HcsEventOptionEnableLiveMigrationEvents uint32 = 4 +) + +// HcsEvent is the event structure passed to HCS_EVENT_CALLBACK. +type HcsEvent struct { + Type HcsEventType + EventData *uint16 +} diff --git a/internal/hcs/migration.go b/internal/hcs/migration.go new file mode 100644 index 0000000000..198a4e618c --- /dev/null +++ b/internal/hcs/migration.go @@ -0,0 +1,372 @@ +//go:build windows + +package hcs + +import ( + "context" + "encoding/json" + "errors" + "syscall" + "unsafe" + + "github.com/Microsoft/hcsshim/internal/computecore" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/oc" + + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" + "golang.org/x/sys/windows" +) + +// liveMigrationSocketURI is the HCS resource URI for the live migration transport socket. +const liveMigrationSocketURI = "hcs:/VirtualMachine/LiveMigrationSocket" + +// migrationNotificationBufferSize is the capacity of the LM notification channel. +const migrationNotificationBufferSize = 16 + +// MigrationConfig holds parameters for starting a compute system as a live migration +// destination, or for initiating the source side of a live migration. +type MigrationConfig struct { + // Socket is the handle to the live migration transport socket. + Socket syscall.Handle + // SessionID identifies the migration session. + SessionID uint32 +} + +// migrationCallback is the syscall callback registered with HcsSetComputeSystemCallback +// for live migration events. It receives events and dispatches them to the channel +// stored in the System via the callbackContext pointer. +var migrationCallback = syscall.NewCallback(migrationCallbackHandler) + +// migrationCallbackHandler is invoked by computecore.dll for live migration events. +func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { + if eventPtr == 0 || ctx == 0 { + return 0 + } + + e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr)) + ch := *(*chan string)(unsafe.Pointer(ctx)) + + eventData := "" + if e.EventData != nil { + eventData = windows.UTF16PtrToString(e.EventData) + } + + logrus.WithFields(logrus.Fields{ + "event-type": e.Type.String(), + "event-data": eventData, + }).Debug("HCS migration notification") + + // Non-blocking send to avoid blocking the HCS callback thread. + select { + case ch <- eventData: + default: + logrus.WithField("event-type", e.Type.String()).Warn("migration notification channel full, dropping event") + } + + return 0 +} + +// openMigrationHandle opens a second computecore handle to the same system and +// registers a callback for live migration events. It populates +// computeSystem.migrationHandle and computeSystem.migrationNotifyCh. +// +// The caller MUST hold computeSystem.handleLock. +func (computeSystem *System) openMigrationHandle(ctx context.Context) error { + if computeSystem.migrationHandle != 0 { + // Already open — idempotent. + return nil + } + + // Sanity check: the primary handle must be valid. + if computeSystem.handle == 0 { + return ErrAlreadyClosed + } + + // Open a second handle via computecore for LM operations and events. + handle, err := computecore.HcsOpenComputeSystem(ctx, computeSystem.id, syscall.GENERIC_ALL) + if err != nil { + return err + } + + // Create the notification channel and store it on the struct. + computeSystem.migrationHandle = handle + computeSystem.migrationNotifyCh = make(chan string, migrationNotificationBufferSize) + // Register the callback. + if err := computecore.HcsSetComputeSystemCallback(ctx, handle, computecore.HcsEventOptionEnableLiveMigrationEvents, uintptr(unsafe.Pointer(&computeSystem.migrationNotifyCh)), migrationCallback); err != nil { + computeSystem.migrationNotifyCh = nil + computeSystem.migrationHandle = 0 + computecore.HcsCloseComputeSystem(ctx, handle) + return err + } + return nil +} + +// closeMigrationHandle unregisters the LM callback, closes the migration handle, +// and drains the notification channel. +// +// The caller MUST hold computeSystem.handleLock. +func (computeSystem *System) closeMigrationHandle(ctx context.Context) { + if computeSystem.migrationHandle == 0 { + return + } + + // Unregister callback by passing zeros. + _ = computecore.HcsSetComputeSystemCallback(ctx, computeSystem.migrationHandle, computecore.HcsEventOptionNone, 0, 0) + + // Close compute system. + computecore.HcsCloseComputeSystem(ctx, computeSystem.migrationHandle) + computeSystem.migrationHandle = 0 + + // Nullify the handle and notification channel. + if computeSystem.migrationNotifyCh != nil { + close(computeSystem.migrationNotifyCh) + computeSystem.migrationNotifyCh = nil + } +} + +// StartWithMigrationOptions synchronously starts the compute system as a live +// migration destination using the provided configuration. +func (computeSystem *System) StartWithMigrationOptions(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("live migration config must not be nil") + } + + operation := "hcs::System::Start" + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + // Open the migration handle for LM events and operations. + if err := computeSystem.openMigrationHandle(ctx); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer func() { + if err != nil { + computeSystem.closeMigrationHandle(ctx) + } + }() + + // Create a computecore operation to track the start request. + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Attach the live migration socket to the operation. + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Build start options with destination migration settings. + options := hcsschema.StartOptions{ + DestinationMigrationOptions: &hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + }, + } + raw, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + return computeSystem.start(ctx, op, string(raw)) +} + +// InitializeLiveMigrationOnSource initializes a live migration on the source side with the given options. +func (computeSystem *System) InitializeLiveMigrationOnSource(ctx context.Context, options *hcsschema.MigrationInitializeOptions) (err error) { + operation := "hcs::System::InitializeLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + // Open the migration handle for LM events and operations. + if err = computeSystem.openMigrationHandle(ctx); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer func() { + if err != nil { + computeSystem.closeMigrationHandle(ctx) + } + }() + + if options == nil { + options = &hcsschema.MigrationInitializeOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Issue the initialize call and wait for completion. + if err = computecore.HcsInitializeLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err = computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// StartLiveMigrationOnSource starts the live migration on the source side using the provided +// transport socket and session ID. +func (computeSystem *System) StartLiveMigrationOnSource(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("migration config must not be nil") + } + + operation := "hcs::System::StartLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Attach the migration socket to the operation before starting. + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + options := hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Issue the start call and wait for completion. + if err := computecore.HcsStartLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// StartLiveMigrationTransfer starts the memory transfer phase of a live migration. +func (computeSystem *System) StartLiveMigrationTransfer(ctx context.Context, options *hcsschema.MigrationTransferOptions) (err error) { + operation := "hcs::System::StartLiveMigrationTransfer" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + if options == nil { + options = &hcsschema.MigrationTransferOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Begin the memory transfer and wait for completion. + if err := computecore.HcsStartLiveMigrationTransfer(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// FinalizeLiveMigration completes the live migration workflow. If resume is true the VM +// is resumed on the destination; otherwise it is stopped. +func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, resume bool) (err error) { + operation := "hcs::System::FinalizeLiveMigration" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + // Choose whether to resume or stop the VM after migration. + finalOp := hcsschema.MigrationFinalOperationStop + if resume { + finalOp = hcsschema.MigrationFinalOperationResume + } + optionsJSON, err := json.Marshal(hcsschema.MigrationFinalizedOptions{FinalizedOperation: finalOp}) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Finalize the migration and wait for completion. + if err := computecore.HcsFinalizeLiveMigration(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Migration is complete — release the migration handle and callback. + computeSystem.closeMigrationHandle(ctx) + return nil +} + +// MigrationNotifications returns a read-only channel that receives live migration +// event data strings. Returns an error if no migration handle is open. +func (computeSystem *System) MigrationNotifications() (<-chan string, error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.migrationHandle == 0 { + return nil, errors.New("migration handle not open; call StartWithMigrationOptions or InitializeLiveMigrationOnSource first") + } + return computeSystem.migrationNotifyCh, nil +} diff --git a/internal/hcs/schema2/migration.go b/internal/hcs/schema2/migration.go new file mode 100644 index 0000000000..e459ae1c18 --- /dev/null +++ b/internal/hcs/schema2/migration.go @@ -0,0 +1,118 @@ +package hcsschema + +// MigrationInitializeOptions is a set of options for the migration workflow. +type MigrationInitializeOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` + // MemoryTransport specifies the settings for memory transfer during migration. On source, this + // setting is required when migration is started. On destination, this setting is required when + // migration is initiated. + MemoryTransport MigrationMemoryTransport `json:"MemoryTransport,omitempty"` + // MemoryTransferThrottleParams specifies settings for throttling during memory transfer. + MemoryTransferThrottleParams *MemoryMigrationTransferThrottleParams `json:"MemoryTransferThrottleParams,omitempty"` + // CompressionSettings specifies additional settings when compression is enabled. + CompressionSettings *MigrationCompressionSettings `json:"CompressionSettings,omitempty"` + // ChecksumVerification enables memory checksum verification. + ChecksumVerification bool `json:"ChecksumVerification,omitempty"` + // PerfTracingEnabled enables performance tracing during migration. + PerfTracingEnabled bool `json:"PerfTracingEnabled,omitempty"` + // CancelIfBlackoutThresholdExceeds cancels the operation if the blackout threshold is exceeded. + CancelIfBlackoutThresholdExceeds bool `json:"CancelIfBlackoutThresholdExceeds,omitempty"` + // PrepareMemoryTransferMode extends timeout for cross-version live migration. + PrepareMemoryTransferMode bool `json:"PrepareMemoryTransferMode,omitempty"` + // CompatibilityData is the compatibility information required for the destination VM. + CompatibilityData *CompatibilityInfo `json:"CompatibilityData,omitempty"` +} + +// MigrationFinalizedOptions is a set of additional options used for HcsLiveMigrationFinalization. +type MigrationFinalizedOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` + // FinalizedOperation is the final state transition for the VM as part of concluding the LM workflow. + FinalizedOperation MigrationFinalOperation `json:"FinalizedOperation,omitempty"` +} + +// MigrationStartOptions specifies options for starting a migration. +type MigrationStartOptions struct { + // NetworkSettings specifies network settings for the socket provided. + NetworkSettings *MigrationNetworkSettings `json:"NetworkSettings,omitempty"` +} + +// MigrationTransferOptions specifies options for the migration transfer phase. +type MigrationTransferOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` +} + +// StartOptions specifies options for starting a compute system. +type StartOptions struct { + // DestinationMigrationOptions specifies settings to use when starting a migration on the destination side. + DestinationMigrationOptions *MigrationStartOptions `json:"DestinationMigrationOptions,omitempty"` +} + +// MigrationOrigin indicates where migration is initiated from. +type MigrationOrigin string + +const ( + // MigrationOriginSource indicates the source side of migration. + MigrationOriginSource MigrationOrigin = "Source" + // MigrationOriginDestination indicates the destination side of migration. + MigrationOriginDestination MigrationOrigin = "Destination" +) + +// MigrationMemoryTransport is the transport protocol used for memory transfer during migration. +type MigrationMemoryTransport string + +const ( + // MigrationMemoryTransportTCP indicates the VM memory is copied over a TCP/IP connection. + MigrationMemoryTransportTCP MigrationMemoryTransport = "TCP" +) + +// MemoryMigrationTransferThrottleParams specifies settings for migration memory transfer throttling. +type MemoryMigrationTransferThrottleParams struct { + // SkipThrottling indicates whether throttling should be skipped. + SkipThrottling *bool `json:"SkipThrottling,omitempty"` + // ThrottlingScale is the scale of the throttling as a percentage (1-100). + ThrottlingScale *float64 `json:"ThrottlingScale,omitempty"` + // MinimumThrottlePercentage is the minimum percentage to which memory transfer can be throttled. + MinimumThrottlePercentage *uint8 `json:"MinimumThrottlePercentage,omitempty"` + // TargetNumberOfBrownoutTransferPasses is the number of passes targeted before the VM enters blackout. + TargetNumberOfBrownoutTransferPasses *uint32 `json:"TargetNumberOfBrownoutTransferPasses,omitempty"` + // StartingBrownoutPassNumberForThrottling is the transfer pass where throttling begins. + StartingBrownoutPassNumberForThrottling *uint32 `json:"StartingBrownoutPassNumberForThrottling,omitempty"` + // MaximumNumberOfBrownoutTransferPasses is the maximum number of passes before forcing blackout. + MaximumNumberOfBrownoutTransferPasses *uint32 `json:"MaximumNumberOfBrownoutTransferPasses,omitempty"` + // TargetBlackoutTransferTime is the expected duration for blackout transfer time. + TargetBlackoutTransferTime *uint32 `json:"TargetBlackoutTransferTime,omitempty"` + // BlackoutTimeThresholdForCancellingMigration is the blackout duration threshold for cancelling migration. + BlackoutTimeThresholdForCancellingMigration *uint32 `json:"BlackoutTimeThresholdForCancellingMigration,omitempty"` +} + +// MigrationCompressionSettings specifies compression settings for migration. +type MigrationCompressionSettings struct { + // ThrottleWorkerCount is the [de]compression thread count. Values higher than what the host + // and VM configuration can support will be adjusted. The value should be non-zero. + ThrottleWorkerCount *uint32 `json:"ThrottleWorkerCount,omitempty"` +} + +// CompatibilityInfo is opaque VM compatibility data, primarily used in migration. +type CompatibilityInfo struct { + // Data is the raw compatibility information. + Data []byte `json:"Data,omitempty"` +} + +// MigrationFinalOperation is the final operation performed on the compute system to finalize the live migration workflow. +type MigrationFinalOperation string + +const ( + // MigrationFinalOperationResume resumes the VM. + MigrationFinalOperationResume MigrationFinalOperation = "Resume" + // MigrationFinalOperationStop stops the VM. + MigrationFinalOperationStop MigrationFinalOperation = "Stop" +) + +// MigrationNetworkSettings specifies the transport protocol for network connection provided by client. +type MigrationNetworkSettings struct { + // SessionID is the session ID associated with the socket connection between source and destination. + SessionID uint32 `json:"SessionId,omitempty"` +} diff --git a/internal/hcs/schema2/virtual_machine.go b/internal/hcs/schema2/virtual_machine.go index 0b66870ec6..630d1b7820 100644 --- a/internal/hcs/schema2/virtual_machine.go +++ b/internal/hcs/schema2/virtual_machine.go @@ -26,4 +26,6 @@ type VirtualMachine struct { DebugOptions *DebugOptions `json:"DebugOptions,omitempty"` GuestConnection *GuestConnection `json:"GuestConnection,omitempty"` SecuritySettings *SecuritySettings `json:"SecuritySettings,omitempty"` + // Live migration options to be used on destination. + MigrationOptions *MigrationInitializeOptions `json:"MigrationOptions,omitempty"` } diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 823e27b0b7..dc678467b3 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -12,6 +12,7 @@ import ( "syscall" "time" + "github.com/Microsoft/hcsshim/internal/computecore" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" @@ -26,10 +27,12 @@ import ( ) type System struct { - handleLock sync.RWMutex - handle vmcompute.HcsSystem - id string - callbackNumber uintptr + handleLock sync.RWMutex + handle vmcompute.HcsSystem + migrationHandle computecore.HcsSystem + migrationNotifyCh chan string + id string + callbackNumber uintptr closedWaitOnce sync.Once waitBlock chan struct{} @@ -192,7 +195,30 @@ func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]sch } // Start synchronously starts the computeSystem. -func (computeSystem *System) Start(ctx context.Context) (err error) { +func (computeSystem *System) Start(ctx context.Context) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, "hcs::System::Start", ErrAlreadyClosed, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, "hcs::System::Start", err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + return computeSystem.start(ctx, op, "") +} + +// start is the shared implementation used by Start and StartWithMigrationOptions. +// The caller provides a pre-created computecore operation (with any resources already +// attached) and the JSON-encoded options string to pass to HcsStartComputeSystem. +// +// The caller MUST hold computeSystem.handleLock and verify the handle is valid +// before calling this method. +func (computeSystem *System) start(ctx context.Context, op computecore.HcsOperation, opts string) (err error) { operation := "hcs::System::Start" // hcsStartComputeSystemContext is an async operation. Start the outer span @@ -202,21 +228,19 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - computeSystem.handleLock.RLock() - defer computeSystem.handleLock.RUnlock() - - // prevent starting an exited system because waitblock we do not recreate waitBlock - // or rerun waitBackground, so we have no way to be notified of it closing again - if computeSystem.handle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + if err := computecore.HcsStartComputeSystem( + ctx, + computecore.HcsSystem(computeSystem.handle), + op, + opts, + ); err != nil { + return makeSystemError(computeSystem, operation, err, nil) } - resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "") - events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, - hcsNotificationSystemStartCompleted, &timeout.SystemStart) - if err != nil { - return makeSystemError(computeSystem, operation, err, events) + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) } + computeSystem.startTime = time.Now() return nil } @@ -787,6 +811,9 @@ func (computeSystem *System) CloseCtx(ctx context.Context) (err error) { close(computeSystem.waitBlock) }) + // Clean up migration handle if it was opened. + computeSystem.closeMigrationHandle(ctx) + return nil }