Skip to content

Commit

Permalink
feat: add constructed event for kube api startup window
Browse files Browse the repository at this point in the history
during single node deployments knowing the window between graceful shutdown end and readyz start is helpful to determine when connection refused events are expected.

Signed-off-by: ehila <ehila@redhat.com>
  • Loading branch information
eggfoobar committed Apr 16, 2024
1 parent 7379790 commit 9a62f62
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
10 changes: 7 additions & 3 deletions e2echart/e2e-chart-template.html
Expand Up @@ -145,7 +145,7 @@ <h5 class="modal-title">Resource</h5>
}

function isGracefulShutdownActivity(eventInterval) {
return (eventInterval.tempSource === "APIServerGracefulShutdown")
return (eventInterval.tempSource === "APIServerGracefulShutdown" || eventInterval.tempSource === "APIServerStartup")
}

function isEndpointConnectivity(eventInterval) {
Expand Down Expand Up @@ -278,6 +278,10 @@ <h5 class="modal-title">Resource</h5>
}

function apiserverShutdownValue(item) {
let criticalIndex = item.message.indexOf("startup-analyzer")
if (criticalIndex != -1) {
return [item.locator, "", "StartupInterval"]
}
// TODO: isolate DNS error into CIClusterDisruption
return [item.locator, "", "GracefulShutdownInterval"]
}
Expand Down Expand Up @@ -484,7 +488,7 @@ <h5 class="modal-title">Resource</h5>
'Update', 'Drain', 'Reboot', 'OperatingSystemUpdate', 'NodeNotReady', // nodes
'Passed', 'Skipped', 'Flaked', 'Failed', // tests
'PodCreated', 'PodScheduled', 'PodTerminating','ContainerWait', 'ContainerStart', 'ContainerNotReady', 'ContainerReady', 'ContainerReadinessFailed', 'ContainerReadinessErrored', 'StartupProbeFailed', // pods
'CIClusterDisruption', 'Disruption', // disruption
'CIClusterDisruption', 'Disruption', 'GracefulShutdownInterval', 'StartupInterval', // disruption
'Degraded', 'Upgradeable', 'False', 'Unknown',
'PodLogInfo', 'PodLogWarning', 'PodLogError',
'EtcdOther', 'EtcdLeaderFound', 'EtcdLeaderLost', 'EtcdLeaderElected', 'EtcdLeaderMissing'])
Expand All @@ -495,7 +499,7 @@ <h5 class="modal-title">Resource</h5>
'#1e7bd9', '#4294e6', '#6aaef2', '#96cbff', '#fada5e', // nodes
'#3cb043', '#ceba76', '#ffa500', '#d0312d', // tests
'#96cbff', '#1e7bd9', '#ffa500', '#ca8dfd', '#9300ff', '#fada5e','#3cb043', '#d0312d', '#d0312d', '#c90076', // pods
'#96cbff', '#d0312d', // disruption
'#96cbff', '#d0312d', '#6a6b6b','#373838', // disruption
'#b65049', '#32b8b6', '#ffffff', '#bbbbbb',
'#96cbff', '#fada5e', '#d0312d',
'#d3d3de', '#03fc62', '#fc0303', '#fada5e', '#8c5efa']); // EtcdLeadership
Expand Down
2 changes: 2 additions & 0 deletions pkg/monitor/monitorapi/types.go
Expand Up @@ -163,6 +163,7 @@ const (
DisruptionEndedEventReason IntervalReason = "DisruptionEnded"
DisruptionSamplerOutageBeganEventReason IntervalReason = "DisruptionSamplerOutageBegan"
GracefulAPIServerShutdown IntervalReason = "GracefulShutdownWindow"
APIServerStartupWindow IntervalReason = "StartupWindow"

HttpClientConnectionLost IntervalReason = "HttpClientConnectionLost"

Expand Down Expand Up @@ -272,6 +273,7 @@ type IntervalSource string
const (
SourceAlert IntervalSource = "Alert"
SourceAPIServerShutdown IntervalSource = "APIServerShutdown"
SourceAPIServerStartup IntervalSource = "APIServerStartup"
SourceDisruption IntervalSource = "Disruption"
SourceE2ETest IntervalSource = "E2ETest"
SourceKubeEvent IntervalSource = "KubeEvent"
Expand Down
Expand Up @@ -901,10 +901,13 @@ func newSingleNodeConnectionRefusedEventMatcher(finalIntervals monitorapi.Interv
)
snoTopology := v1.SingleReplicaTopologyMode
ocpAPISeverTargetDownIntervals := finalIntervals.Filter(func(eventInterval monitorapi.Interval) bool {
return eventInterval.Source == monitorapi.SourceAlert &&
isTargetDown := eventInterval.Source == monitorapi.SourceAlert &&
eventInterval.StructuredLocator.Keys[monitorapi.LocatorAlertKey] == "TargetDown" &&
(eventInterval.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey] == ocpAPINamespace ||
eventInterval.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey] == ocpOAuthAPINamespace)

isKubeAPIStartupWindow := eventInterval.Source == monitorapi.SourceAPIServerStartup
return isTargetDown || isKubeAPIStartupWindow
})
if len(ocpAPISeverTargetDownIntervals) > 0 {
logrus.Infof("found %d OCP APIServer TargetDown intervals", len(ocpAPISeverTargetDownIntervals))
Expand Down
Expand Up @@ -3,6 +3,7 @@ package apiservergracefulrestart
import (
"context"
"fmt"
"strings"
"time"

"github.com/openshift/origin/pkg/monitortestframework"
Expand Down Expand Up @@ -86,6 +87,10 @@ func (*apiserverGracefulShutdownAnalyzer) ConstructComputedIntervals(ctx context
delete(startedIntervals, key)
}

if strings.Contains(podRef.Name, "kube-apiserver") {
startedIntervals[key] = currInterval.To
}

computedIntervals = append(computedIntervals,
monitorapi.NewInterval(monitorapi.APIServerGracefulShutdown, monitorapi.Info).
Locator(monitorapi.NewLocator().
Expand All @@ -98,6 +103,25 @@ func (*apiserverGracefulShutdownAnalyzer) ConstructComputedIntervals(ctx context
Display().
Build(startTime, currInterval.To),
)
case "HasBeenReady":
startTime := beginning
if prevStart, ok := startedIntervals[key]; ok {
startTime = prevStart
delete(startedIntervals, key)
}

computedIntervals = append(computedIntervals,
monitorapi.NewInterval(monitorapi.SourceAPIServerStartup, monitorapi.Info).
Locator(monitorapi.NewLocator().
LocateServer(namespaceToServer[podRef.Namespace], nodeName, podRef.Namespace, podRef.Name, true),
).
Message(monitorapi.NewMessage().
Constructed("startup-analyzer").
Reason(monitorapi.APIServerStartupWindow),
).
Display().
Build(startTime, currInterval.To),
)
}
}

Expand Down Expand Up @@ -161,7 +185,7 @@ func interesting(interval monitorapi.Interval) (monitorapi.IntervalReason, bool)
reason := monitorapi.ReasonFrom(interval.Message)
switch reason {
// openshift-apiserver still is using the old event name TerminationStart
case "ShutdownInitiated", "TerminationStart", "TerminationGracefulTerminationFinished":
case "ShutdownInitiated", "TerminationStart", "TerminationGracefulTerminationFinished", "HasBeenReady":
return reason, true
default:
return "", false
Expand Down
10 changes: 7 additions & 3 deletions test/extended/testdata/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9a62f62

Please sign in to comment.