Skip to content

Commit

Permalink
check-endpoints: add log data to outages
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchezl committed Jul 21, 2020
1 parent 753ac78 commit 84a9529
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 71 deletions.
32 changes: 23 additions & 9 deletions pkg/cmd/checkendpoints/controller/connection_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func manageStatusLogs(check *operatorcontrolplanev1alpha1.PodNetworkConnectivity
// PodNetworkConnectivityCheck.Status entries based on Successes/Failures log entries.
func manageStatusOutage(recorder events.Recorder) v1alpha1helpers.UpdateStatusFunc {
return func(status *operatorcontrolplanev1alpha1.PodNetworkConnectivityCheckStatus) {
// This func is kept simple by assuming that only on log entry has been
// This func is kept simple by assuming that only one log entry has been
// added since the last time this method was invoked. See checkEndpoint func.
var currentOutage *operatorcontrolplanev1alpha1.OutageEntry
if len(status.Outages) > 0 && status.Outages[0].End.IsZero() {
Expand All @@ -233,16 +233,30 @@ func manageStatusOutage(recorder events.Recorder) v1alpha1helpers.UpdateStatusFu
if len(status.Successes) > 0 {
latestSuccess = status.Successes[0]
}
if currentOutage == nil {
if latestFailure.Start.After(latestSuccess.Start.Time) {
recorder.Warningf("ConnectivityOutageDetected", "Connectivity outage detected: %s", latestFailure.Message)
status.Outages = append([]operatorcontrolplanev1alpha1.OutageEntry{{Start: latestFailure.Start}}, status.Outages...)
switch {
case currentOutage == nil && latestFailure.Start.After(latestSuccess.Start.Time):
// outage started
newOutage := operatorcontrolplanev1alpha1.OutageEntry{
Start: latestFailure.Start,
StartLogs: []operatorcontrolplanev1alpha1.LogEntry{latestFailure},
Message: fmt.Sprintf("Connectivity outage detected at %v", latestFailure.Start),
}
} else {
if latestSuccess.Start.After(latestFailure.Start.Time) {
currentOutage.End = latestSuccess.Start
recorder.Eventf("ConnectivityRestored", "Connectivity restored after %v: %s", currentOutage.End.Sub(currentOutage.Start.Time), latestSuccess.Message)
status.Outages = append([]operatorcontrolplanev1alpha1.OutageEntry{newOutage}, status.Outages...)
recorder.Warningf("ConnectivityOutageDetected", "Connectivity outage detected: %s", latestFailure.Message)
case currentOutage != nil && latestFailure.Start.After(latestSuccess.Start.Time):
// outage ongoing
if len(currentOutage.StartLogs) == 0 || currentOutage.StartLogs[0].Message != latestFailure.Message {
currentOutage.StartLogs = append([]operatorcontrolplanev1alpha1.LogEntry{latestFailure}, currentOutage.StartLogs...)
}
case currentOutage != nil && latestSuccess.Start.After(latestFailure.Start.Time):
// outage ended
currentOutage.End = latestSuccess.Start
outageDuration := currentOutage.End.Sub(currentOutage.Start.Time)
currentOutage.EndLogs = []operatorcontrolplanev1alpha1.LogEntry{latestSuccess}
currentOutage.Message = fmt.Sprintf("Connectivity restored after %v", outageDuration)
recorder.Eventf("ConnectivityRestored", "Connectivity restored after %v: %s", outageDuration, latestSuccess.Message)
default:
// no outage in progress
}
}
}
Expand Down

0 comments on commit 84a9529

Please sign in to comment.