forked from vulcand/vulcand
/
anomaly.go
114 lines (100 loc) · 2.82 KB
/
anomaly.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package anomaly
import (
"fmt"
"time"
"github.com/mailgun/vulcand/Godeps/_workspace/src/github.com/mailgun/vulcan/metrics"
"github.com/mailgun/vulcand/backend"
)
const (
CodeLatency = iota + 1
CodeNetErrorRate
CodeAppErrorRate
)
const (
MessageNetErrRate = "Error rate stands out"
MessageAppErrRate = "App error rate (status 500) stands out"
MessageLatency = "%0.2f quantile latency stands out"
)
// MarkEndpointAnomalies takes the list of endpoints and marks anomalies detected within this set
// by modifying the inner Verdict property.
func MarkEndpointAnomalies(endpoints []*backend.Endpoint) error {
if len(endpoints) == 0 {
return nil
}
stats := make([]*backend.RoundTripStats, len(endpoints))
for i, e := range endpoints {
stats[i] = &e.Stats
}
return MarkAnomalies(stats)
}
// MarkAnomalies takes the list of stats and marks anomalies detected within this group by updating
// the Verdict property.
func MarkAnomalies(stats []*backend.RoundTripStats) error {
if len(stats) == 0 {
return nil
}
if err := markLatencies(stats); err != nil {
return err
}
if err := markNetErrorRates(stats); err != nil {
return err
}
return markAppErrorRates(stats)
}
func markNetErrorRates(stats []*backend.RoundTripStats) error {
errRates := make([]float64, len(stats))
for i, s := range stats {
errRates[i] = s.NetErrorRatio()
}
_, bad := metrics.SplitRatios(errRates)
for _, s := range stats {
if bad[s.NetErrorRatio()] {
s.Verdict.IsBad = true
s.Verdict.Anomalies = append(s.Verdict.Anomalies, backend.Anomaly{Code: CodeNetErrorRate, Message: MessageNetErrRate})
}
}
return nil
}
func markLatencies(stats []*backend.RoundTripStats) error {
// We are processing only median as others are more volatile
return markLatency(0, stats)
}
func markLatency(index int, stats []*backend.RoundTripStats) error {
quantiles := make([]time.Duration, len(stats))
for i, s := range stats {
v, err := s.LatencyBrackets.GetQuantile(50)
if err != nil {
return err
}
quantiles[i] = v.Value
}
quantile := stats[0].LatencyBrackets[index].Quantile
_, bad := metrics.SplitLatencies(quantiles, time.Millisecond)
for _, s := range stats {
if bad[s.LatencyBrackets[index].Value] {
s.Verdict.IsBad = true
s.Verdict.Anomalies = append(
s.Verdict.Anomalies,
backend.Anomaly{
Code: CodeLatency,
Message: fmt.Sprintf(MessageLatency, quantile),
})
}
}
return nil
}
func markAppErrorRates(stats []*backend.RoundTripStats) error {
errRates := make([]float64, len(stats))
for i, s := range stats {
errRates[i] = s.AppErrorRatio()
}
_, bad := metrics.SplitRatios(errRates)
for _, s := range stats {
if bad[s.AppErrorRatio()] {
s.Verdict.IsBad = true
s.Verdict.Anomalies = append(
s.Verdict.Anomalies, backend.Anomaly{Code: CodeAppErrorRate, Message: MessageAppErrRate})
}
}
return nil
}