Skip to content

Commit

Permalink
feat: add constructed event for kube api startup window
Browse files Browse the repository at this point in the history
during single node deployments knowing the window between graceful shutdown end and readyz start is helpful to determine when connection refused events are expected.

Signed-off-by: ehila <ehila@redhat.com>
  • Loading branch information
eggfoobar committed Apr 19, 2024
1 parent 34cefb1 commit cdbed6f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
10 changes: 7 additions & 3 deletions e2echart/e2e-chart-template.html
Expand Up @@ -135,7 +135,7 @@ <h5 class="modal-title">Resource</h5>
}

function isGracefulShutdownActivity(eventInterval) {
return (eventInterval.tempSource === "APIServerGracefulShutdown")
return (eventInterval.tempSource === "APIServerGracefulShutdown" || eventInterval.tempSource === "APIServerStartup")
}

function isEndpointConnectivity(eventInterval) {
Expand Down Expand Up @@ -260,6 +260,10 @@ <h5 class="modal-title">Resource</h5>
}

function apiserverShutdownValue(item) {
let criticalIndex = item.message.indexOf("startup-analyzer")
if (criticalIndex != -1) {
return [item.locator, "", "StartupInterval"]
}
// TODO: isolate DNS error into CIClusterDisruption
return [buildLocatorDisplayString(item.tempStructuredLocator), "", "GracefulShutdownInterval"]
}
Expand Down Expand Up @@ -534,7 +538,7 @@ <h5 class="modal-title">Resource</h5>
'Update', 'Drain', 'Reboot', 'OperatingSystemUpdate', 'NodeNotReady', // nodes
'Passed', 'Skipped', 'Flaked', 'Failed', // tests
'PodCreated', 'PodScheduled', 'PodTerminating','ContainerWait', 'ContainerStart', 'ContainerNotReady', 'ContainerReady', 'ContainerReadinessFailed', 'ContainerReadinessErrored', 'StartupProbeFailed', // pods
'CIClusterDisruption', 'Disruption', // disruption
'CIClusterDisruption', 'Disruption', 'GracefulShutdownInterval', 'StartupInterval', // disruption
'Degraded', 'Upgradeable', 'False', 'Unknown',
'PodLogInfo', 'PodLogWarning', 'PodLogError',
'EtcdOther', 'EtcdLeaderFound', 'EtcdLeaderLost', 'EtcdLeaderElected', 'EtcdLeaderMissing'])
Expand All @@ -545,7 +549,7 @@ <h5 class="modal-title">Resource</h5>
'#1e7bd9', '#4294e6', '#6aaef2', '#96cbff', '#fada5e', // nodes
'#3cb043', '#ceba76', '#ffa500', '#d0312d', // tests
'#96cbff', '#1e7bd9', '#ffa500', '#ca8dfd', '#9300ff', '#fada5e','#3cb043', '#d0312d', '#d0312d', '#c90076', // pods
'#96cbff', '#d0312d', // disruption
'#96cbff', '#d0312d', '#6a6b6b','#373838', // disruption
'#b65049', '#32b8b6', '#ffffff', '#bbbbbb',
'#96cbff', '#fada5e', '#d0312d',
'#d3d3de', '#03fc62', '#fc0303', '#fada5e', '#8c5efa']); // EtcdLeadership
Expand Down
2 changes: 2 additions & 0 deletions pkg/monitor/monitorapi/types.go
Expand Up @@ -159,6 +159,7 @@ const (
DisruptionSamplerOutageBeganEventReason IntervalReason = "DisruptionSamplerOutageBegan"
GracefulAPIServerShutdown IntervalReason = "GracefulAPIServerShutdown"
IncompleteAPIServerShutdown IntervalReason = "IncompleteAPIServerShutdown"
APIServerStartupWindow IntervalReason = "StartupWindow"

HttpClientConnectionLost IntervalReason = "HttpClientConnectionLost"

Expand Down Expand Up @@ -269,6 +270,7 @@ type IntervalSource string
const (
SourceAlert IntervalSource = "Alert"
SourceAPIServerShutdown IntervalSource = "APIServerShutdown"
SourceAPIServerStartup IntervalSource = "APIServerStartup"
SourceDisruption IntervalSource = "Disruption"
SourceE2ETest IntervalSource = "E2ETest"
SourceKubeEvent IntervalSource = "KubeEvent"
Expand Down
Expand Up @@ -901,10 +901,13 @@ func newSingleNodeConnectionRefusedEventMatcher(finalIntervals monitorapi.Interv
)
snoTopology := v1.SingleReplicaTopologyMode
ocpAPISeverTargetDownIntervals := finalIntervals.Filter(func(eventInterval monitorapi.Interval) bool {
return eventInterval.Source == monitorapi.SourceAlert &&
isTargetDown := eventInterval.Source == monitorapi.SourceAlert &&
eventInterval.StructuredLocator.Keys[monitorapi.LocatorAlertKey] == "TargetDown" &&
(eventInterval.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey] == ocpAPINamespace ||
eventInterval.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey] == ocpOAuthAPINamespace)

isKubeAPIStartupWindow := eventInterval.Source == monitorapi.SourceAPIServerStartup
return isTargetDown || isKubeAPIStartupWindow
})
if len(ocpAPISeverTargetDownIntervals) > 0 {
logrus.Infof("found %d OCP APIServer TargetDown intervals", len(ocpAPISeverTargetDownIntervals))
Expand Down
Expand Up @@ -3,6 +3,7 @@ package apiservergracefulrestart
import (
"context"
"fmt"
"strings"
"time"

"github.com/openshift/origin/pkg/monitortestframework"
Expand Down Expand Up @@ -86,6 +87,10 @@ func (*apiserverGracefulShutdownAnalyzer) ConstructComputedIntervals(ctx context
delete(startedIntervals, key)
}

if strings.Contains(podRef.Name, "kube-apiserver") {
startedIntervals[key] = currInterval.To
}

computedIntervals = append(computedIntervals,
monitorapi.NewInterval(monitorapi.APIServerGracefulShutdown, monitorapi.Info).
Locator(monitorapi.NewLocator().
Expand All @@ -98,6 +103,25 @@ func (*apiserverGracefulShutdownAnalyzer) ConstructComputedIntervals(ctx context
Display().
Build(startTime, currInterval.To),
)
case "HasBeenReady":
startTime := beginning
if prevStart, ok := startedIntervals[key]; ok {
startTime = prevStart
delete(startedIntervals, key)
}

computedIntervals = append(computedIntervals,
monitorapi.NewInterval(monitorapi.SourceAPIServerStartup, monitorapi.Info).
Locator(monitorapi.NewLocator().
LocateServer(namespaceToServer[podRef.Namespace], nodeName, podRef.Namespace, podRef.Name, true),
).
Message(monitorapi.NewMessage().
Constructed("startup-analyzer").
Reason(monitorapi.APIServerStartupWindow),
).
Display().
Build(startTime, currInterval.To),
)
}
}

Expand Down Expand Up @@ -161,7 +185,7 @@ func interesting(interval monitorapi.Interval) (monitorapi.IntervalReason, bool)
reason := interval.StructuredMessage.Reason
switch reason {
// openshift-apiserver still is using the old event name TerminationStart
case "ShutdownInitiated", "TerminationStart", "TerminationGracefulTerminationFinished":
case "ShutdownInitiated", "TerminationStart", "TerminationGracefulTerminationFinished", "HasBeenReady":
return reason, true
default:
return "", false
Expand Down
10 changes: 7 additions & 3 deletions test/extended/testdata/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit cdbed6f

Please sign in to comment.