Skip to content

Commit

Permalink
runtime: Add option "enable_guest_swap" to config hypervisor.qemu
Browse files Browse the repository at this point in the history
This commit add option "enable_guest_swap" to config hypervisor.qemu.
It will enable swap in the guest. Default false.
When enable_guest_swap is enabled, insert a raw file to the guest as the
swap device if the swappiness of a container (set by annotation
"io.katacontainers.container.resource.swappiness") is bigger than 0.
The size of the swap device should be
swap_in_bytes (set by annotation
"io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
If swap_in_bytes and memory_limit_in_bytes is not set, the size should be
default_memory.

Fixes: #2201

Signed-off-by: Hui Zhu <teawater@antfin.com>
  • Loading branch information
teawater committed Jul 19, 2021
1 parent a733f53 commit cb6b766
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 10 deletions.
11 changes: 11 additions & 0 deletions src/runtime/cli/config/configuration-qemu.toml.in
Expand Up @@ -356,6 +356,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
#guest_memory_dump_paging=false

# Enable swap in the guest. Default false.
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
# is bigger than 0.
# The size of the swap device should be
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
# be default_memory.
#enable_guest_swap = true

[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
Expand Down
1 change: 1 addition & 0 deletions src/runtime/pkg/katautils/config-settings.go.in
Expand Up @@ -55,6 +55,7 @@ const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
const defaultRxRateLimiterMaxRate = uint64(0)
const defaultTxRateLimiterMaxRate = uint64(0)
const defaultConfidentialGuest = false
const defaultGuestSwap = false

var defaultSGXEPCSize = int64(0)

Expand Down
3 changes: 3 additions & 0 deletions src/runtime/pkg/katautils/config.go
Expand Up @@ -133,6 +133,7 @@ type hypervisor struct {
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
}

type runtime struct {
Expand Down Expand Up @@ -711,6 +712,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
GuestSwap: h.GuestSwap,
}, nil
}

Expand Down Expand Up @@ -1066,6 +1068,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize,
ConfidentialGuest: defaultConfidentialGuest,
GuestSwap: defaultGuestSwap,
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/runtime/virtcontainers/hypervisor.go
Expand Up @@ -458,6 +458,9 @@ type HypervisorConfig struct {

// MemOffset specifies memory space for nvdimm device
MemOffset uint64

// GuestSwap Used to enable/disable swap in the guest
GuestSwap bool
}

// vcpu mapping from vcpu number to thread number
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/virtcontainers/pkg/annotations/annotations.go
Expand Up @@ -220,6 +220,9 @@ const (

// TxRateLimiter is a sandbox annotation that specifies max rate on network I/O outbound bandwidth
TxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "tx_rate_limiter_max_rate"

// EnableGuestSwap is a sandbox annotation to enable swap in the guest.
EnableGuestSwap = kataAnnotHypervisorPrefix + "enable_guest_swap"
)

// Runtime related annotations
Expand Down
7 changes: 7 additions & 0 deletions src/runtime/virtcontainers/pkg/oci/utils.go
Expand Up @@ -539,6 +539,7 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
}
}
}

return nil
}

Expand Down Expand Up @@ -616,6 +617,12 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
return err
}

if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
}); err != nil {
return err
}

return nil
}

Expand Down
89 changes: 81 additions & 8 deletions src/runtime/virtcontainers/sandbox.go
Expand Up @@ -8,6 +8,7 @@ package virtcontainers

import (
"bufio"
"bytes"
"context"
"fmt"
"io"
Expand Down Expand Up @@ -65,6 +66,8 @@ const (

// DirMode is the permission bits used for creating a directory
DirMode = os.FileMode(0750) | os.ModeDir

mkswapPath = "/sbin/mkswap"
)

var (
Expand Down Expand Up @@ -200,6 +203,10 @@ type Sandbox struct {
ctx context.Context

cw *consoleWatcher

swapDeviceNum uint
swapSizeBytes int64
swapDevices []*config.BlockDrive
}

// ID returns the sandbox identifier string.
Expand Down Expand Up @@ -519,6 +526,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
sharePidNs: sandboxConfig.SharePidNs,
networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
ctx: ctx,
swapDeviceNum: 0,
swapSizeBytes: 0,
swapDevices: []*config.BlockDrive{},
}

hypervisor.setSandbox(s)
Expand Down Expand Up @@ -1028,9 +1038,13 @@ func (s *Sandbox) addSwap(ctx context.Context, swapID string, size int64) (*conf
return nil, err
}

err = exec.CommandContext(ctx, "/sbin/mkswap", swapFile).Run()
var outbuf, errbuf bytes.Buffer
cmd := exec.CommandContext(ctx, mkswapPath, swapFile)
cmd.Stdout = &outbuf
cmd.Stderr = &errbuf
err = cmd.Run()
if err != nil {
err = fmt.Errorf("mkswap swapfile %s fail %s", swapFile, err.Error())
err = fmt.Errorf("mkswap swapfile %s fail %s stdout %s stderr %s", swapFile, err.Error(), outbuf.String(), errbuf.String())
s.Logger().WithError(err).Error("addSwap")
return nil, err
}
Expand Down Expand Up @@ -1079,6 +1093,30 @@ func (s *Sandbox) removeSwap(ctx context.Context, blockDevice *config.BlockDrive
return err
}

func (s *Sandbox) setupSwap(ctx context.Context, sizeBytes int64) error {
if sizeBytes > s.swapSizeBytes {
dev, err := s.addSwap(ctx, fmt.Sprintf("swap%d", s.swapDeviceNum), sizeBytes-s.swapSizeBytes)
if err != nil {
return err
}

s.swapDeviceNum += 1
s.swapSizeBytes = sizeBytes
s.swapDevices = append(s.swapDevices, dev)
}

return nil
}

func (s *Sandbox) cleanSwap(ctx context.Context) {
for _, dev := range s.swapDevices {
err := s.removeSwap(ctx, dev)
if err != nil {
s.Logger().Warnf("remove swap device %+v got error %s", dev, err)
}
}
}

// startVM starts the VM.
func (s *Sandbox) startVM(ctx context.Context) (err error) {
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", s.tracingTags())
Expand Down Expand Up @@ -1641,6 +1679,8 @@ func (s *Sandbox) Stop(ctx context.Context, force bool) error {
return err
}

s.cleanSwap(ctx)

return nil
}

Expand Down Expand Up @@ -1894,9 +1934,21 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
// Add default vcpus for sandbox
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs

sandboxMemoryByte := s.calculateSandboxMemory()
sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory()
// Add default / rsvd memory for sandbox.
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
hypervisorMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
sandboxMemoryByte += hypervisorMemoryByte
if sandboxneedPodSwap {
sandboxSwapByte += hypervisorMemoryByte
}

// Setup the SWAP in the guest
if sandboxSwapByte > 0 {
err = s.setupSwap(ctx, sandboxSwapByte)
if err != nil {
return err
}
}

// Update VCPUs
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
Expand Down Expand Up @@ -1941,20 +1993,41 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
return nil
}

func (s *Sandbox) calculateSandboxMemory() int64 {
func (s *Sandbox) calculateSandboxMemory() (int64, bool, int64) {
memorySandbox := int64(0)
needPodSwap := false
swapSandbox := int64(0)
for _, c := range s.config.Containers {
// Do not hot add again non-running containers resources
if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account memory resources of not running containers")
continue
}

if m := c.Resources.Memory; m != nil && m.Limit != nil {
memorySandbox += *m.Limit
if m := c.Resources.Memory; m != nil {
currentLimit := int64(0)
if m.Limit != nil {
currentLimit = *m.Limit
memorySandbox += currentLimit
}
if s.config.HypervisorConfig.GuestSwap && m.Swappiness != nil && *m.Swappiness > 0 {
currentSwap := int64(0)
if m.Swap != nil {
currentSwap = *m.Swap
}
if currentSwap == 0 {
if currentLimit == 0 {
needPodSwap = true
} else {
swapSandbox += currentLimit
}
} else if currentSwap > currentLimit {
swapSandbox = currentSwap - currentLimit
}
}
}
}
return memorySandbox
return memorySandbox, needPodSwap, swapSandbox
}

func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
Expand Down
6 changes: 4 additions & 2 deletions src/runtime/virtcontainers/sandbox_test.go
Expand Up @@ -168,8 +168,10 @@ func TestCalculateSandboxMem(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sandbox.config.Containers = tt.containers
got := sandbox.calculateSandboxMemory()
assert.Equal(t, got, tt.want)
mem, needSwap, swap := sandbox.calculateSandboxMemory()
assert.Equal(t, mem, tt.want)
assert.Equal(t, needSwap, false)
assert.Equal(t, swap, int64(0))
})
}
}
Expand Down

0 comments on commit cb6b766

Please sign in to comment.