forked from vulcand/vulcand
-
Notifications
You must be signed in to change notification settings - Fork 1
/
anomaly.go
121 lines (106 loc) · 2.97 KB
/
anomaly.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package anomaly
import (
"fmt"
"time"
"github.com/mailgun/vulcand/Godeps/_workspace/src/github.com/mailgun/oxy/memmetrics"
"github.com/mailgun/vulcand/engine"
)
const (
CodeLatency = iota + 1
CodeNetErrorRate
CodeAppErrorRate
)
const (
MessageNetErrRate = "Error rate stands out"
MessageAppErrRate = "App error rate (status 500) stands out"
MessageLatency = "%0.2f quantile latency stands out"
)
// MarkServerAnomalies takes the list of servers and marks anomalies detected within this set
// by modifying the inner Verdict property.
func MarkServerAnomalies(servers []engine.Server) error {
if len(servers) == 0 {
return nil
}
stats := make([]engine.RoundTripStats, len(servers))
for i := range servers {
stats[i] = *servers[i].Stats
}
if err := MarkAnomalies(stats); err != nil {
return err
}
for i := range stats {
servers[i].Stats = &stats[i]
}
return nil
}
// MarkAnomalies takes the list of stats and marks anomalies detected within this group by updating
// the Verdict property.
func MarkAnomalies(stats []engine.RoundTripStats) error {
if len(stats) == 0 {
return nil
}
if err := markLatencies(stats); err != nil {
return err
}
if err := markNetErrorRates(stats); err != nil {
return err
}
return markAppErrorRates(stats)
}
func markNetErrorRates(stats []engine.RoundTripStats) error {
errRates := make([]float64, len(stats))
for i, s := range stats {
errRates[i] = s.NetErrorRatio()
}
_, bad := memmetrics.SplitRatios(errRates)
for i := range stats {
if bad[stats[i].NetErrorRatio()] {
stats[i].Verdict.IsBad = true
stats[i].Verdict.Anomalies = append(stats[i].Verdict.Anomalies, engine.Anomaly{Code: CodeNetErrorRate, Message: MessageNetErrRate})
}
}
return nil
}
func markLatencies(stats []engine.RoundTripStats) error {
// We are processing only median as others are more volatile
return markLatency(0, stats)
}
func markLatency(index int, stats []engine.RoundTripStats) error {
quantiles := make([]time.Duration, len(stats))
for i, s := range stats {
v, err := s.LatencyBrackets.GetQuantile(50)
if err != nil {
return err
}
quantiles[i] = v.Value
}
quantile := stats[0].LatencyBrackets[index].Quantile
_, bad := memmetrics.SplitLatencies(quantiles, time.Millisecond)
for i, s := range stats {
if bad[s.LatencyBrackets[index].Value] {
stats[i].Verdict.IsBad = true
stats[i].Verdict.Anomalies = append(
stats[i].Verdict.Anomalies,
engine.Anomaly{
Code: CodeLatency,
Message: fmt.Sprintf(MessageLatency, quantile),
})
}
}
return nil
}
func markAppErrorRates(stats []engine.RoundTripStats) error {
errRates := make([]float64, len(stats))
for i, s := range stats {
errRates[i] = s.AppErrorRatio()
}
_, bad := memmetrics.SplitRatios(errRates)
for i, s := range stats {
if bad[s.AppErrorRatio()] {
stats[i].Verdict.IsBad = true
stats[i].Verdict.Anomalies = append(
stats[i].Verdict.Anomalies, engine.Anomaly{Code: CodeAppErrorRate, Message: MessageAppErrRate})
}
}
return nil
}