Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 48 additions & 15 deletions deploy/aws/cloudformation/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Metadata:
- InstanceType
- RootVolumeSize
- DataVolumeSize
- DataVolumeIops
- DataVolumeThroughput
- AmiSsmParameter
- Label:
default: Access
Expand Down Expand Up @@ -51,6 +53,10 @@ Metadata:
default: Root volume size
DataVolumeSize:
default: Hypeman data volume size
DataVolumeIops:
default: Hypeman data volume IOPS
DataVolumeThroughput:
default: Hypeman data volume throughput
HypemanVersion:
default: Hypeman release
HypemanBranch:
Expand Down Expand Up @@ -109,6 +115,16 @@ Parameters:
MinValue: 50
MaxValue: 16384
Description: Hypeman data EBS volume size in GiB. This volume is formatted as XFS and mounted at /var/lib/hypeman.
DataVolumeIops:
Type: String
Default: ""
AllowedPattern: "^$|^[0-9]+$"
Description: Optional provisioned IOPS for the Hypeman data gp3 EBS volume. Leave empty for the EC2 default.
DataVolumeThroughput:
Type: String
Default: ""
AllowedPattern: "^$|^[0-9]+$"
Description: Optional provisioned throughput in MiB/s for the Hypeman data gp3 EBS volume. When set, Hypeman disk I/O capacity is configured to the same value.
HypemanVersion:
Type: String
Default: latest
Expand Down Expand Up @@ -285,18 +301,34 @@ Resources:
stack_uuid = event["StackId"].rsplit("/", 1)[-1]
return f"{event['ResourceProperties']['NamePrefix']}-{stack_uuid}"

def create_launch_template(name):
def put_if_set(payload, key, value):
if value:
payload[key] = value

def create_launch_template(name, props):
payload = {
"Action": "CreateLaunchTemplate",
"Version": "2016-11-15",
"LaunchTemplateName": name,
"LaunchTemplateData.CpuOptions.NestedVirtualization": "enabled",
"LaunchTemplateData.BlockDeviceMapping.1.DeviceName": "/dev/sda1",
"LaunchTemplateData.BlockDeviceMapping.1.Ebs.VolumeSize": props["RootVolumeSize"],
"LaunchTemplateData.BlockDeviceMapping.1.Ebs.VolumeType": "gp3",
"LaunchTemplateData.BlockDeviceMapping.1.Ebs.Encrypted": "true",
"LaunchTemplateData.BlockDeviceMapping.1.Ebs.DeleteOnTermination": "true",
"LaunchTemplateData.BlockDeviceMapping.2.DeviceName": "/dev/sdf",
"LaunchTemplateData.BlockDeviceMapping.2.Ebs.VolumeSize": props["DataVolumeSize"],
"LaunchTemplateData.BlockDeviceMapping.2.Ebs.VolumeType": "gp3",
"LaunchTemplateData.BlockDeviceMapping.2.Ebs.Encrypted": "true",
"LaunchTemplateData.BlockDeviceMapping.2.Ebs.DeleteOnTermination": "true",
"TagSpecification.1.ResourceType": "launch-template",
"TagSpecification.1.Tag.1.Key": "Name",
"TagSpecification.1.Tag.1.Value": name,
"TagSpecification.1.Tag.2.Key": "hypeman:deployment",
"TagSpecification.1.Tag.2.Value": "aws",
}
put_if_set(payload, "LaunchTemplateData.BlockDeviceMapping.2.Ebs.Iops", props.get("DataVolumeIops", ""))
put_if_set(payload, "LaunchTemplateData.BlockDeviceMapping.2.Ebs.Throughput", props.get("DataVolumeThroughput", ""))
xml = ec2_query(payload)
root = ET.fromstring(xml)
launch_template_id = root.find(".//{*}launchTemplateId")
Expand Down Expand Up @@ -326,7 +358,7 @@ Resources:
return
if request_type == "Update":
delete_launch_template(physical_id)
data = create_launch_template(launch_template_name(event))
data = create_launch_template(launch_template_name(event), event["ResourceProperties"])
send(event, context, "SUCCESS", data, physical_id=data["LaunchTemplateId"])
except Exception as exc:
traceback.print_exc()
Expand All @@ -337,6 +369,10 @@ Resources:
Properties:
ServiceToken: !GetAtt NestedVirtualizationLaunchTemplateFunction.Arn
NamePrefix: hypeman
RootVolumeSize: !Ref RootVolumeSize
DataVolumeSize: !Ref DataVolumeSize
DataVolumeIops: !Ref DataVolumeIops
DataVolumeThroughput: !Ref DataVolumeThroughput

HypemanHost:
Type: AWS::EC2::Instance
Expand All @@ -351,19 +387,6 @@ Resources:
- !Ref HypemanSecurityGroup
IamInstanceProfile: !Ref HypemanInstanceProfile
KeyName: !If [UseSSH, !Ref KeyName, !Ref AWS::NoValue]
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: !Ref RootVolumeSize
VolumeType: gp3
Encrypted: true
DeleteOnTermination: true
- DeviceName: /dev/sdf
Ebs:
VolumeSize: !Ref DataVolumeSize
VolumeType: gp3
Encrypted: true
DeleteOnTermination: true
Tags:
- Key: Name
Value: !Sub ${AWS::StackName}-hypeman
Expand Down Expand Up @@ -445,6 +468,16 @@ Resources:
fi
curl -fsSL https://raw.githubusercontent.com/kernel/hypeman/main/scripts/install.sh | bash

if [ -n "${DataVolumeThroughput}" ]; then
install -d -m 755 /etc/systemd/system/hypeman.service.d
cat >/etc/systemd/system/hypeman.service.d/disk-io-capacity.conf <<EOF
[Service]
Environment="CAPACITY__DISK_IO=${DataVolumeThroughput}MB/s"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MiB vs MB unit mismatch in disk I/O capacity

Low Severity

The DataVolumeThroughput parameter is documented as "throughput in MiB/s" (matching AWS gp3 units), but the generated environment variable appends MB/s — a decimal megabyte unit. The parseDiskIOLimit function and the c2h5oh/datasize library treat MB as 1,000,000 bytes, while AWS's MiB equals 1,048,576 bytes. This causes CAPACITY__DISK_IO to be ~4.9% lower than the actual provisioned throughput, so per-VM defaults and admission limits are consistently underestimated.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit e603ced. Configure here.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bugbot Autofix determined this is a false positive.

This is not a real capacity mismatch because the datasize parser used by parseDiskIOLimit interprets MB as 1024^2 bytes (MiB), so ${DataVolumeThroughput}MB/s maps to the same throughput value provisioned in MiB/s.

You can send follow-ups to the cloud agent here.

EOF
systemctl daemon-reload
systemctl restart hypeman
fi

install -d -m 755 /opt/hypeman/deploy
cat >/usr/local/bin/hypeman-create-token <<'SCRIPT'
#!/usr/bin/env bash
Expand Down
25 changes: 14 additions & 11 deletions deploy/aws/cloudformation/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ func TestQuickstartParameters(t *testing.T) {
assertDefault(t, parameters, "AllowedSshCidr", "127.0.0.1/32")
assertDefault(t, parameters, "RootVolumeSize", "30")
assertDefault(t, parameters, "DataVolumeSize", "100")
assertDefault(t, parameters, "DataVolumeIops", "")
assertDefault(t, parameters, "DataVolumeThroughput", "")
assertDefault(t, parameters, "HypemanVersion", "latest")
assertDefault(t, parameters, "HypemanCliVersion", "latest")

Expand Down Expand Up @@ -87,6 +89,16 @@ func TestCloudFormationLaunchContract(t *testing.T) {
zipFile := scalar(t, requireField(t, code, "ZipFile"))
assertContains(t, zipFile, `"Action": "CreateLaunchTemplate"`)
assertContains(t, zipFile, `"LaunchTemplateData.CpuOptions.NestedVirtualization": "enabled"`)
assertContains(t, zipFile, `"LaunchTemplateData.BlockDeviceMapping.1.Ebs.VolumeSize": props["RootVolumeSize"]`)
assertContains(t, zipFile, `"LaunchTemplateData.BlockDeviceMapping.2.Ebs.VolumeSize": props["DataVolumeSize"]`)
assertContains(t, zipFile, `"LaunchTemplateData.BlockDeviceMapping.2.Ebs.Iops"`)
assertContains(t, zipFile, `"LaunchTemplateData.BlockDeviceMapping.2.Ebs.Throughput"`)

launchTemplateProperties := requireMapping(t, requireField(t, launchTemplate, "Properties"))
assertRef(t, requireField(t, launchTemplateProperties, "RootVolumeSize"), "RootVolumeSize")
assertRef(t, requireField(t, launchTemplateProperties, "DataVolumeSize"), "DataVolumeSize")
assertRef(t, requireField(t, launchTemplateProperties, "DataVolumeIops"), "DataVolumeIops")
assertRef(t, requireField(t, launchTemplateProperties, "DataVolumeThroughput"), "DataVolumeThroughput")

host := requireMapping(t, requireField(t, resources, "HypemanHost"))
if got := scalar(t, requireField(t, host, "Type")); got != "AWS::EC2::Instance" {
Expand All @@ -97,19 +109,10 @@ func TestCloudFormationLaunchContract(t *testing.T) {
assertGetAtt(t, requireField(t, hostLaunchTemplate, "LaunchTemplateId"), "NestedVirtualizationLaunchTemplate.LaunchTemplateId")
assertGetAtt(t, requireField(t, hostLaunchTemplate, "Version"), "NestedVirtualizationLaunchTemplate.VersionNumber")

blockDeviceMappings := requireSequence(t, requireField(t, hostProperties, "BlockDeviceMappings"))
if len(blockDeviceMappings.Content) != 2 {
t.Fatalf("expected root and Hypeman data block device mappings, got %d", len(blockDeviceMappings.Content))
}
dataDevice := requireMapping(t, blockDeviceMappings.Content[1])
if got := scalar(t, requireField(t, dataDevice, "DeviceName")); got != "/dev/sdf" {
t.Fatalf("data device name = %q, want /dev/sdf", got)
}
dataEBS := requireMapping(t, requireField(t, dataDevice, "Ebs"))
assertRef(t, requireField(t, dataEBS, "VolumeSize"), "DataVolumeSize")

userData := nodeText(requireField(t, hostProperties, "UserData"))
assertContains(t, userData, "curl -fsSL https://raw.githubusercontent.com/kernel/hypeman/main/scripts/install.sh | bash")
assertContains(t, userData, `if [ -n "${DataVolumeThroughput}" ]; then`)
assertContains(t, userData, `Environment="CAPACITY__DISK_IO=${DataVolumeThroughput}MB/s"`)
assertContains(t, userData, "xfsprogs")
assertContains(t, userData, "mkfs.xfs -f")
assertContains(t, userData, "/var/lib/hypeman")
Expand Down
8 changes: 4 additions & 4 deletions lib/resources/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Per-VM disk I/O rate limiting with burst support:

- **Cloud Hypervisor**: Uses native `RateLimiterConfig` with token bucket
- **QEMU**: Uses drive `throttling.bps-total` options
- **Default**: Proportional to CPU: `(vcpus / cpu_capacity) * disk_io_capacity * 2.0`
- **Default**: Proportional to CPU: `(vcpus / cpu_capacity) * disk_io_capacity`
- **Burst**: 4x sustained rate (allows fast cold starts)

## Example: Default Limits
Expand All @@ -84,9 +84,9 @@ Per-VM disk I/O rate limiting with burst support:

| Resource | Calculation | Default Limit |
|----------|-------------|---------------|
| Network (down/up) | 10Gbps × 2.0 × 12.5% | 2.5 Gbps (312 MB/s) |
| Disk I/O (sustained) | 1GB/s × 2.0 × 12.5% | 250 MB/s |
| Disk I/O (burst) | 250 MB/s × 4 | 1 GB/s |
| Network (down/up) | 10Gbps × 12.5% | 1.25 Gbps (156 MB/s) |
| Disk I/O (sustained) | 1GB/s × 12.5% | 125 MB/s |
| Disk I/O (burst) | 125 MB/s × 4 | 500 MB/s |

## Effective Limits

Expand Down
19 changes: 4 additions & 15 deletions lib/resources/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ func (m *Manager) DiskIOCapacity() int64 {

// DefaultNetworkBandwidth calculates the default network bandwidth for an instance
// based on its CPU allocation proportional to host CPU capacity.
// Formula: (instanceVcpus / hostCpuCapacity) * networkCapacity * oversubRatio
// Formula: (instanceVcpus / hostCpuCapacity) * networkCapacity
// Returns symmetric download/upload limits.
func (m *Manager) DefaultNetworkBandwidth(vcpus int) (downloadBps, uploadBps int64) {
cpuCapacity := m.CPUCapacity()
Expand All @@ -800,19 +800,15 @@ func (m *Manager) DefaultNetworkBandwidth(vcpus int) (downloadBps, uploadBps int
return 0, 0
}

ratio := m.GetOversubRatio(ResourceNetwork)
effectiveNet := int64(float64(netCapacity) * ratio)

// Proportional to CPU: (vcpus / cpuCapacity) * effectiveNet
bandwidth := (int64(vcpus) * effectiveNet) / cpuCapacity
bandwidth := (int64(vcpus) * netCapacity) / cpuCapacity

// Symmetric limits by default
return bandwidth, bandwidth
}

// DefaultDiskIOBandwidth calculates the default disk I/O bandwidth for an instance
// based on its CPU allocation proportional to host CPU capacity.
// Formula: (instanceVcpus / hostCpuCapacity) * diskIOCapacity * oversubRatio
// Formula: (instanceVcpus / hostCpuCapacity) * diskIOCapacity
// Returns sustained rate and burst rate (4x sustained).
func (m *Manager) DefaultDiskIOBandwidth(vcpus int) (ioBps, burstBps int64) {
cpuCapacity := m.CPUCapacity()
Expand All @@ -825,14 +821,7 @@ func (m *Manager) DefaultDiskIOBandwidth(vcpus int) (ioBps, burstBps int64) {
return 0, 0
}

ratio := m.cfg.Oversubscription.DiskIO
if ratio <= 0 {
ratio = 2.0 // Default 2x oversubscription for disk I/O
}
effectiveIO := int64(float64(ioCapacity) * ratio)

// Proportional to CPU: (vcpus / cpuCapacity) * effectiveIO
sustained := (int64(vcpus) * effectiveIO) / cpuCapacity
sustained := (int64(vcpus) * ioCapacity) / cpuCapacity

// Burst is 4x sustained (allows fast cold starts)
burst := sustained * 4
Expand Down
31 changes: 30 additions & 1 deletion lib/resources/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func TestDefaultNetworkBandwidth(t *testing.T) {
cfg := &config.Config{
DataDir: t.TempDir(),
Oversubscription: config.OversubscriptionConfig{
CPU: 1.0, Memory: 1.0, Disk: 1.0, Network: 1.0,
CPU: 1.0, Memory: 1.0, Disk: 1.0, Network: 4.0,
},
Capacity: config.CapacityConfig{Network: "10Gbps"}, // 1.25 GB/s = 1,250,000,000 bytes/sec
}
Expand Down Expand Up @@ -123,6 +123,35 @@ func TestDefaultNetworkBandwidth_ZeroCPU(t *testing.T) {
assert.Equal(t, int64(0), uploadBw, "Should return 0 when CPU capacity is 0")
}

func TestDefaultDiskIOBandwidthIgnoresAdmissionOversubscription(t *testing.T) {
cfg := &config.Config{
DataDir: t.TempDir(),
Oversubscription: config.OversubscriptionConfig{
CPU: 1.0, Memory: 1.0, Disk: 1.0, Network: 1.0, DiskIO: 4.0,
},
Capacity: config.CapacityConfig{DiskIO: "1GB/s"},
}
p := paths.New(cfg.DataDir)

mgr := NewManager(cfg, p)
mgr.SetInstanceLister(&mockInstanceLister{})
mgr.SetImageLister(&mockImageLister{})
mgr.SetVolumeLister(&mockVolumeLister{})

err := mgr.Initialize(context.Background())
require.NoError(t, err)

cpuCapacity := mgr.CPUCapacity()
ioCapacity := mgr.DiskIOCapacity()

if cpuCapacity > 0 && ioCapacity > 0 {
ioBps, burstBps := mgr.DefaultDiskIOBandwidth(2)
expected := (int64(2) * ioCapacity) / cpuCapacity
assert.Equal(t, expected, ioBps)
assert.Equal(t, expected*4, burstBps)
}
}

func TestParseBandwidth(t *testing.T) {
tests := []struct {
input string
Expand Down
Loading