Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,13 @@ tests:
LVM_OPERATOR_SUB_SOURCE: lvm-catalogsource
TECH_PREVIEW_NO_UPGRADE: "true"
workflow: hypershift-openstack-nested-conformance
- as: e2e-aws-spot
cron: 0 6 * * 1
steps:
cluster_profile: hypershift-aws
env:
CI_TESTS_RUN: ^TestNodePool$/HostedCluster0/TestSpotTerminationHandler$
workflow: hypershift-aws-e2e-spot
zz_generated_metadata:
branch: release-4.22
org: openshift
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,89 @@ periodics:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build01
cron: 0 6 * * 1
decorate: true
decoration_config:
skip_cloning: true
extra_refs:
- base_ref: release-4.22
org: openshift
repo: hypershift
labels:
ci-operator.openshift.io/cloud: hypershift-aws
ci-operator.openshift.io/cloud-cluster-profile: hypershift-aws
ci-operator.openshift.io/variant: periodics
ci.openshift.io/generator: prowgen
job-release: "4.22"
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: periodic-ci-openshift-hypershift-release-4.22-periodics-e2e-aws-spot
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=e2e-aws-spot
- --variant=periodics
command:
- ci-operator
env:
- name: HTTP_SERVER_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest
imagePullPolicy: Always
name: ""
ports:
- containerPort: 8080
name: http
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build01
cron: 0 */12 * * *
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"path": "hypershift/aws/e2e/spot/hypershift-aws-e2e-spot-workflow.yaml",
"owners": {
"approvers": [
"csrwng",
"enxebre",
"sjenning"
],
"reviewers": [
"csrwng",
"enxebre",
"sjenning"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
workflow:
as: hypershift-aws-e2e-spot
documentation: |-
The HyperShift AWS spot instance e2e workflow creates an SQS queue and
EventBridge rules, then executes spot instance tests against a new
ephemeral HyperShift cluster.

This workflow extends hypershift-aws-e2e-external with SQS infrastructure
setup/cleanup for the AWS Node Termination Handler spot instance test.
steps:
pre:
- ref: ipi-install-rbac
- chain: hypershift-setup-nested-management-cluster
- ref: hypershift-install
- ref: hypershift-aws-spot-sqs-setup
test:
- ref: hypershift-aws-run-e2e-external
- ref: hypershift-aws-spot-verify
post:
- ref: hypershift-aws-spot-sqs-cleanup
- chain: hypershift-destroy-nested-management-cluster
8 changes: 8 additions & 0 deletions ci-operator/step-registry/hypershift/aws/spot/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
approvers:
- csrwng
- enxebre
- sjenning
reviewers:
- csrwng
- enxebre
- sjenning
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -o nounset
set -o pipefail
set -o xtrace

# This step cleans up the SQS queue and EventBridge rules created by
# hypershift-aws-spot-sqs-setup. Best-effort cleanup; errors are logged
# but do not fail the job.

AWS_CREDS_FILE="/etc/hypershift-pool-aws-credentials/credentials"
AWS_REGION="${AWS_REGION:-us-east-1}"

export AWS_SHARED_CREDENTIALS_FILE="${AWS_CREDS_FILE}"
export AWS_DEFAULT_REGION="${AWS_REGION}"

# Read state from setup step
QUEUE_URL=""
RULE_PREFIX=""

if [[ -f "${SHARED_DIR}/spot_sqs_queue_url" ]]; then
QUEUE_URL=$(cat "${SHARED_DIR}/spot_sqs_queue_url")
fi

if [[ -f "${SHARED_DIR}/spot_eventbridge_rule_prefix" ]]; then
RULE_PREFIX=$(cat "${SHARED_DIR}/spot_eventbridge_rule_prefix")
fi

# Clean up EventBridge rules
if [[ -n "${RULE_PREFIX}" ]]; then
echo "Cleaning up EventBridge rules with prefix: ${RULE_PREFIX}"

for SUFFIX in interruption rebalance; do
RULE_NAME="${RULE_PREFIX}-${SUFFIX}"
echo "Removing targets and deleting rule: ${RULE_NAME}"
aws events remove-targets --rule "${RULE_NAME}" --ids 1 --region "${AWS_REGION}" 2>/dev/null || true
aws events delete-rule --name "${RULE_NAME}" --region "${AWS_REGION}" 2>/dev/null || true
done

echo "EventBridge rules cleaned up"
fi

# Clean up SQS queue
if [[ -n "${QUEUE_URL}" ]]; then
echo "Deleting SQS queue: ${QUEUE_URL}"
aws sqs delete-queue --queue-url "${QUEUE_URL}" --region "${AWS_REGION}" 2>/dev/null || true
echo "SQS queue deleted"
fi

echo "Spot SQS cleanup complete"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"path": "hypershift/aws/spot/sqs-cleanup/hypershift-aws-spot-sqs-cleanup-ref.yaml",
"owners": {
"approvers": [
"csrwng",
"enxebre",
"sjenning"
],
"reviewers": [
"csrwng",
"enxebre",
"sjenning"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
ref:
as: hypershift-aws-spot-sqs-cleanup
commands: hypershift-aws-spot-sqs-cleanup-commands.sh
credentials:
- mount_path: /etc/hypershift-pool-aws-credentials
name: hypershift-pool-aws-credentials
namespace: test-credentials
env:
- default: "us-east-1"
name: AWS_REGION
documentation: "AWS region for the SQS queue"
best_effort: true
from_image:
namespace: ocp
name: "4.22"
tag: upi-installer
grace_period: 5m0s
resources:
requests:
cpu: 100m
memory: 100Mi
timeout: 10m0s
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail
set -o xtrace

# This step creates the SQS queue and EventBridge rules needed for the
# AWS Node Termination Handler spot instance e2e test.
#
# The queue name matches the hardcoded value in the hypershift e2e test:
# test/e2e/nodepool_spot_termination_handler_test.go
#
# Uses hypershift-pool-aws-credentials which is the same credential
# the e2e test binary uses to discover the queue.

AWS_CREDS_FILE="/etc/hypershift-pool-aws-credentials/credentials"
AWS_REGION="${AWS_REGION:-us-east-1}"
QUEUE_NAME="${SQS_QUEUE_NAME:-agarcial-nth-queue}"

export AWS_SHARED_CREDENTIALS_FILE="${AWS_CREDS_FILE}"
export AWS_DEFAULT_REGION="${AWS_REGION}"

echo "Creating SQS queue: ${QUEUE_NAME} in region ${AWS_REGION}"

# Create the queue (idempotent - returns existing queue if it already exists)
QUEUE_URL=$(aws sqs create-queue --queue-name "${QUEUE_NAME}" --region "${AWS_REGION}" --query 'QueueUrl' --output text)
echo "Queue URL: ${QUEUE_URL}"

# Get the queue ARN
QUEUE_ARN=$(aws sqs get-queue-attributes --queue-url "${QUEUE_URL}" --attribute-names QueueArn --region "${AWS_REGION}" --query 'Attributes.QueueArn' --output text)
echo "Queue ARN: ${QUEUE_ARN}"

# Set queue policy to allow EventBridge to send messages
POLICY=$(cat <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "events.amazonaws.com"
},
"Action": "sqs:SendMessage",
"Resource": "${QUEUE_ARN}"
}
]
}
EOF
)

POLICY_ESCAPED=$(echo "${POLICY}" | jq -c . | jq -Rs .)
aws sqs set-queue-attributes \
--queue-url "${QUEUE_URL}" \
--attributes "{\"Policy\": ${POLICY_ESCAPED}}" \
--region "${AWS_REGION}"

echo "SQS queue policy updated"

# Create EventBridge rule for Spot Instance Interruption Warning
RULE_PREFIX="hypershift-ci-spot-${PROW_JOB_ID:0:10}"

aws events put-rule \
--name "${RULE_PREFIX}-interruption" \
--event-pattern '{"source":["aws.ec2"],"detail-type":["EC2 Spot Instance Interruption Warning"]}' \
--region "${AWS_REGION}" || true

aws events put-targets \
--rule "${RULE_PREFIX}-interruption" \
--targets "Id=1,Arn=${QUEUE_ARN}" \
--region "${AWS_REGION}" || true

# Create EventBridge rule for EC2 Instance Rebalance Recommendation
aws events put-rule \
--name "${RULE_PREFIX}-rebalance" \
--event-pattern '{"source":["aws.ec2"],"detail-type":["EC2 Instance Rebalance Recommendation"]}' \
--region "${AWS_REGION}" || true

aws events put-targets \
--rule "${RULE_PREFIX}-rebalance" \
--targets "Id=1,Arn=${QUEUE_ARN}" \
--region "${AWS_REGION}" || true

echo "EventBridge rules created"

# Save state for cleanup
echo "${QUEUE_URL}" > "${SHARED_DIR}/spot_sqs_queue_url"
echo "${QUEUE_NAME}" > "${SHARED_DIR}/spot_sqs_queue_name"
echo "${RULE_PREFIX}" > "${SHARED_DIR}/spot_eventbridge_rule_prefix"

echo "SQS queue and EventBridge rules setup complete"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"path": "hypershift/aws/spot/sqs-setup/hypershift-aws-spot-sqs-setup-ref.yaml",
"owners": {
"approvers": [
"csrwng",
"enxebre",
"sjenning"
],
"reviewers": [
"csrwng",
"enxebre",
"sjenning"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
ref:
as: hypershift-aws-spot-sqs-setup
commands: hypershift-aws-spot-sqs-setup-commands.sh
credentials:
- mount_path: /etc/hypershift-pool-aws-credentials
name: hypershift-pool-aws-credentials
namespace: test-credentials
env:
- default: "us-east-1"
name: AWS_REGION
documentation: "AWS region for the SQS queue"
- default: "agarcial-nth-queue"
name: SQS_QUEUE_NAME
documentation: "Name of the SQS queue for spot termination handler testing"
from_image:
namespace: ocp
name: "4.22"
tag: upi-installer
grace_period: 5m0s
resources:
requests:
cpu: 100m
memory: 100Mi
timeout: 10m0s
Loading