Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package node

import (
"context"
"path/filepath"
"strconv"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"

Expand Down Expand Up @@ -157,4 +163,272 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
e2e.Logf("/dev/fuse mount output: %s", output)
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})

//author: minmli@redhat.com
//migrated from openshift-tests-private
//automates: https://issues.redhat.com/browse/OCPBUGS-44493
g.It("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Test name doesn't match the run command in the PR description.

Line 170 uses terminationGracePeriod (no “Seconds”), but the PR description's test command targets terminationGracePeriodSeconds. The Kubernetes API field is terminationGracePeriodSeconds; align the title to match (and ensure the Polarion title matches as well).

📝 Proposed fix
-	g.It("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
+	g.It("[OTP] add configurable terminationGracePeriodSeconds to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
g.It("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
g.It("[OTP] add configurable terminationGracePeriodSeconds to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended/node/node_e2e/node.go` at line 170, Update the test title
string used in g.It to reference the correct Kubernetes field name
"terminationGracePeriodSeconds" (currently "terminationGracePeriod") and ensure
any Polarion title/identifier used in the same test block is updated to match;
locate the g.It invocation (the test named "[OTP] add configurable
terminationGracePeriod to liveness and startup probes [OCP-44493]") and change
the human-readable title and Polarion metadata to use
"terminationGracePeriodSeconds" so the test name matches the PR description and
API field.

ctx := context.Background()

g.By("Check if featureSet is empty in cluster")
featureSet, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("featuregate", "cluster", "-o=jsonpath={.spec.featureSet}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get featuregate")
e2e.Logf("featureSet is: %s", featureSet)
if featureSet != "" {
g.Skip("featureSet is not empty, skipping test")
}

oc.SetupProject()
namespace := oc.Namespace()

// Helper function to parse duration string like "1m30s" or "45s" to seconds
parseDurationToSeconds := func(durationStr string) (int, error) {
var totalSeconds int
if strings.Contains(durationStr, "m") {
parts := strings.Split(durationStr, "m")
minutes, err := strconv.Atoi(parts[0])
if err != nil {
return 0, err
}
totalSeconds = minutes * 60
if len(parts) > 1 && strings.Contains(parts[1], "s") {
secStr := strings.TrimSuffix(parts[1], "s")
if secStr != "" {
seconds, err := strconv.Atoi(secStr)
if err != nil {
return 0, err
}
totalSeconds += seconds
}
}
} else if strings.Contains(durationStr, "s") {
secStr := strings.TrimSuffix(durationStr, "s")
seconds, err := strconv.Atoi(secStr)
if err != nil {
return 0, err
}
totalSeconds = seconds
}
return totalSeconds, nil
}

// Helper to verify probe termination period
verifyProbeTermination := func(podName string, expectedTerminationSec int) error {
return wait.PollUntilContextTimeout(ctx, 10*time.Second, 4*time.Minute, true, func(ctx context.Context) (bool, error) {
podDesc, err := oc.AsAdmin().WithoutNamespace().Run("describe").Args("pod", podName, "-n", namespace).Output()
if err != nil {
e2e.Logf("Error describing pod: %v", err)
return false, nil
}

// Look for probe failure and container start events
probeFailLine := ""
containerStartLine := ""
for _, line := range strings.Split(podDesc, "\n") {
if strings.Contains(line, "Container") && strings.Contains(line, "failed") && strings.Contains(line, "probe") && strings.Contains(line, "will be restarted") {
probeFailLine = line
}
if strings.Contains(line, "Started container") {
containerStartLine = line
}
}

if probeFailLine == "" || containerStartLine == "" {
e2e.Logf("Waiting for probe failure and container start events")
return false, nil
}

e2e.Logf("Probe failure event: %s", probeFailLine)
e2e.Logf("Container start event: %s", containerStartLine)

// Extract timestamps (format: "1m30s" or "45s")
probeFailFields := strings.Fields(probeFailLine)
containerStartFields := strings.Fields(containerStartLine)
if len(probeFailFields) < 3 || len(containerStartFields) < 3 {
e2e.Logf("Unable to parse event timestamps")
return false, nil
}

probeFailTime := probeFailFields[2]
containerStartTime := containerStartFields[2]

probeFailSec, err := parseDurationToSeconds(probeFailTime)
if err != nil {
e2e.Logf("Error parsing probe fail time: %v", err)
return false, nil
}

containerStartSec, err := parseDurationToSeconds(containerStartTime)
if err != nil {
e2e.Logf("Error parsing container start time: %v", err)
return false, nil
}

timeDiff := probeFailSec - containerStartSec
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)

// Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
if timeDiff >= (expectedTerminationSec-3) && timeDiff <= (expectedTerminationSec+10) {
e2e.Logf("Termination grace period check passed")
return true, nil
}

e2e.Logf("Time difference %d is outside expected range [%d, %d]", timeDiff, expectedTerminationSec-3, expectedTerminationSec+10)
return false, nil
})
}

g.By("Test liveness probe with probe-level terminationGracePeriodSeconds")
livenessPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-probe",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"bash", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: &[]int64{10}[0],
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe", metav1.DeleteOptions{})

err = verifyProbeTermination("liveness-probe", 10)
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe termination grace period not honored")

g.By("Test startup probe with probe-level terminationGracePeriodSeconds")
startupPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "startup-probe",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "teststartup",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"bash", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: &[]int64{10}[0],
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, startupPod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "startup-probe", metav1.DeleteOptions{})

err = verifyProbeTermination("startup-probe", 10)
o.Expect(err).NotTo(o.HaveOccurred(), "startup probe termination grace period not honored")

g.By("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)")
livenessPodNoProbeTerm := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-probe-no-term",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"bash", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
// No TerminationGracePeriodSeconds - should use pod-level (60s)
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPodNoProbeTerm, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe termination")
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe-no-term", metav1.DeleteOptions{})

err = verifyProbeTermination("liveness-probe-no-term", 60)
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe should use pod-level termination grace period when probe-level not set")
})
})