Skip to content

Commit

Permalink
add alerts as event intervals for viewing
Browse files Browse the repository at this point in the history
  • Loading branch information
deads2k committed Oct 7, 2021
1 parent a650954 commit 6c546d1
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 2 deletions.
35 changes: 33 additions & 2 deletions e2echart/e2e-chart-template.html
Expand Up @@ -125,6 +125,13 @@ <h5 class="modal-title">Resource</h5>
return false
}

function isAlert(eventInterval) {
if (eventInterval.locator.startsWith("alert/")) {
return true
}
return false
}

const rePhase = new RegExp("(^| )phase/([^ ]+)")
function nodeStateValue(item) {
let roles = ""
Expand All @@ -147,6 +154,27 @@ <h5 class="modal-title">Resource</h5>
return [item.locator, ` (${roles},updates)`, "Update"];
}

function alertSeverity(item) {
let infoIndex = item.message.indexOf("info")
if (infoIndex != -1) {
return "AlertInfo"
}
let pendingIndex = item.message.indexOf("pending")
if (pendingIndex != -1) {
return "AlertPending"
}
let warningIndex = item.message.indexOf("warning")
if (warningIndex != -1) {
return "AlertWarning"
}
let criticalIndex = item.message.indexOf("critical")
if (criticalIndex != -1) {
return "AlertCritical"
}

return "AlertCritical"
}

function createTimelineData(timelineVal, timelineData, rawEventIntervals, preconditionFunc) {
const data = {}
var now = new Date();
Expand Down Expand Up @@ -211,6 +239,9 @@ <h5 class="modal-title">Resource</h5>
timelineGroups.push({group: "operator-progressing", data: []})
createTimelineData("OperatorProgressing", timelineGroups[timelineGroups.length - 1].data, eventIntervals, isOperatorProgressing)

timelineGroups.push({group: "alerts", data: []})
createTimelineData(alertSeverity, timelineGroups[timelineGroups.length - 1].data, eventIntervals, isAlert)

timelineGroups.push({group: "node-state", data: []})
createTimelineData(nodeStateValue, timelineGroups[timelineGroups.length - 1].data, eventIntervals, isNodeState)
timelineGroups[timelineGroups.length - 1].data.sort(function (e1 ,e2){
Expand Down Expand Up @@ -260,8 +291,8 @@ <h5 class="modal-title">Resource</h5>
const el = document.querySelector('#chart');
const myChart = TimelinesChart();
var ordinalScale = d3.scaleOrdinal()
.domain(['OperatorUnavailable', 'OperatorDegraded', 'OperatorProgressing', 'Update', 'Drain', 'Reboot', 'OperatingSystemUpdate', 'NodeNotReady', 'Passed', 'Skipped', 'Flaked', 'Failed', 'Degraded', 'Upgradeable', 'False', 'Unknown'])
.range(['#d0312d', '#ffa500', '#fada5e', '#1e7bd9', '#4294e6', '#6aaef2', '#96cbff', '#fada5e', '#3cb043', '#ceba76', '#ffa500', '#d0312d', '#b65049', '#32b8b6', '#ffffff', '#bbbbbb']);
.domain(['AlertInfo', 'AlertPending', 'AlertWarning', 'AlertCritical', 'OperatorUnavailable', 'OperatorDegraded', 'OperatorProgressing', 'Update', 'Drain', 'Reboot', 'OperatingSystemUpdate', 'NodeNotReady', 'Passed', 'Skipped', 'Flaked', 'Failed', 'Degraded', 'Upgradeable', 'False', 'Unknown'])
.range(['#fada5e','#fada5e','#ffa500','#d0312d', '#d0312d', '#ffa500', '#fada5e', '#1e7bd9', '#4294e6', '#6aaef2', '#96cbff', '#fada5e', '#3cb043', '#ceba76', '#ffa500', '#d0312d', '#b65049', '#32b8b6', '#ffffff', '#bbbbbb']);
myChart.data(timelineGroups).zQualitative(true).enableAnimations(false).leftMargin(240).rightMargin(550).maxLineHeight(20).maxHeight(10000).zColorScale(ordinalScale).onSegmentClick(segmentFunc)
(el);

Expand Down
156 changes: 156 additions & 0 deletions pkg/monitor/alerts.go
@@ -1 +1,157 @@
package monitor

import (
"context"
"fmt"
"math"
"strings"
"time"

routeclient "github.com/openshift/client-go/route/clientset/versioned"
"github.com/openshift/library-go/test/library/metrics"
"github.com/openshift/origin/pkg/monitor/monitorapi"
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
prometheustypes "github.com/prometheus/common/model"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

func CreateEventIntervalsForAlerts(ctx context.Context, restConfig *rest.Config, startTime time.Time) ([]monitorapi.EventInterval, error) {
kubeClient, err := kubernetes.NewForConfig(restConfig)
if err != nil {
return nil, err
}
routeClient, err := routeclient.NewForConfig(restConfig)
if err != nil {
return nil, err
}
prometheusClient, err := metrics.NewPrometheusClient(ctx, kubeClient, routeClient)
if err != nil {
return nil, err
}

timeRange := prometheusv1.Range{
Start: startTime,
End: time.Now(),
Step: 1 * time.Second,
}
alerts, warningsForQuery, err := prometheusClient.QueryRange(ctx, `ALERTS{alertstate="firing"}`, timeRange)
if err != nil {
return nil, err
}
if len(warningsForQuery) > 0 {
fmt.Printf("#### warnings \n\t%v\n", strings.Join(warningsForQuery, "\n\t"))
}

firingAlerts, err := createEventIntervalsForAlerts(ctx, alerts, startTime)
if err != nil {
return nil, err
}

alerts, warningsForQuery, err = prometheusClient.QueryRange(ctx, `ALERTS{alertstate="pending"}`, timeRange)
if err != nil {
return nil, err
}
if len(warningsForQuery) > 0 {
fmt.Printf("#### warnings \n\t%v\n", strings.Join(warningsForQuery, "\n\t"))
}
pendingAlerts, err := createEventIntervalsForAlerts(ctx, alerts, startTime)
if err != nil {
return nil, err
}

ret := []monitorapi.EventInterval{}
ret = append(ret, firingAlerts...)
ret = append(ret, pendingAlerts...)

return ret, nil
}

func createEventIntervalsForAlerts(ctx context.Context, alerts prometheustypes.Value, startTime time.Time) ([]monitorapi.EventInterval, error) {
fmt.Printf("\n\n\n#### alertType=%v\n", alerts.Type())
fmt.Printf("\n\n\n#### alerts=%v\n\n\n", alerts.String())

ret := []monitorapi.EventInterval{}

switch {
case alerts.Type() == prometheustypes.ValMatrix:
matrixAlert := alerts.(prometheustypes.Matrix)
for _, alert := range matrixAlert {
alertName := alert.Metric[prometheustypes.AlertNameLabel]
if alertName == "Watchdog" {
continue
}

locator := "alert/" + alertName
if node := alert.Metric["instance"]; len(node) > 0 {
locator += " node/" + node
}
if namespace := alert.Metric["namespace"]; len(namespace) > 0 {
locator += " ns/" + namespace
}

alertIntervalTemplate := monitorapi.EventInterval{
Condition: monitorapi.Condition{
Locator: string(locator),
Message: alert.Metric.String(),
},
}
switch alert.Metric["severity"] {
case "warning":
alertIntervalTemplate.Level = monitorapi.Warning
case "critical":
alertIntervalTemplate.Level = monitorapi.Error
case "info", "pending":
alertIntervalTemplate.Level = monitorapi.Info
default:
alertIntervalTemplate.Level = monitorapi.Error
}

var alertStartTime *time.Time
var lastTime *time.Time
for _, currValue := range alert.Values {
currTime := currValue.Timestamp.Time()
if alertStartTime == nil {
alertStartTime = &currTime
}
if lastTime == nil {
lastTime = &currTime
}
// if it has been less than five seconds since we saw this, consider it the same interval and check
// the next time.
if math.Abs(currTime.Sub(*lastTime).Seconds()) < (5 * time.Second).Seconds() {
lastTime = &currTime
continue
}

// if it has been more than five seconds, consider this the start of a new occurrence and add the interval
currAlertInterval := alertIntervalTemplate // shallow copy
currAlertInterval.From = *alertStartTime
currAlertInterval.To = *lastTime
ret = append(ret, currAlertInterval)

// now reset the tracking
alertStartTime = &currTime
lastTime = nil
}

currAlertInterval := alertIntervalTemplate // shallow copy
currAlertInterval.From = *alertStartTime
currAlertInterval.To = *lastTime
ret = append(ret, currAlertInterval)
}

default:
ret = append(ret, monitorapi.EventInterval{
Condition: monitorapi.Condition{
Level: monitorapi.Error,
Locator: "alert/all",
Message: fmt.Sprintf("unhandled type: %v", alerts.Type()),
},
From: startTime,
To: time.Now(),
})
}

return ret, nil
}
9 changes: 9 additions & 0 deletions pkg/test/ginkgo/cmd_runsuite.go
Expand Up @@ -367,6 +367,15 @@ func (opt *Options) Run(suite *TestSuite) error {
sort.Sort(events)
}
}

// add events from alerts so we can create the intervals
alertEventIntervals, err := monitor.CreateEventIntervalsForAlerts(ctx, restConfig, start)
if err != nil {
fmt.Printf("\n\n\n#### alertErr=%v\n", err)
}
events = append(events, alertEventIntervals...)
sort.Sort(events)

events.Clamp(start, end)

if len(opt.JUnitDir) > 0 {
Expand Down

0 comments on commit 6c546d1

Please sign in to comment.