/
healthchecker.go
209 lines (178 loc) · 6.81 KB
/
healthchecker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// Copyright © 2019 by PACE Telematics GmbH. All rights reserved.
// Created at 2019/10/18 Charlotte Pröller
package servicehealthcheck
import (
"context"
"fmt"
"net/http"
"sync"
"time"
"github.com/caarlos0/env"
"github.com/pace/bricks/maintenance/errors"
"github.com/pace/bricks/maintenance/log"
)
// HealthCheck is a health check that is registered once and that is performed
// periodically and/or spontaneously.
type HealthCheck interface {
HealthCheck(ctx context.Context) HealthCheckResult
}
type HealthCheckFunc func(ctx context.Context) HealthCheckResult
func (hcf HealthCheckFunc) HealthCheck(ctx context.Context) HealthCheckResult {
return hcf(ctx)
}
// Initializable is used to mark that a health check needs to be initialized
type Initializable interface {
Init(ctx context.Context) error
}
type config struct {
// Amount of time to cache the last init
HealthCheckInitResultErrorTTL time.Duration `env:"HEALTH_CHECK_INIT_RESULT_ERROR_TTL" envDefault:"10s"`
// Amount of time to wait before failing the health check
HealthCheckMaxWait time.Duration `env:"HEALTH_CHECK_MAX_WAIT" envDefault:"5s"`
}
var cfg config
// requiredChecks contains all required registered Health Checks - key:Name
var requiredChecks sync.Map
// optionalChecks contains all optional registered Health Checks - key:Name
var optionalChecks sync.Map
// initErrors map with all err ConnectionState that happened in the initialization of any health check - key:Name
var initErrors sync.Map
// HealthState describes if a any error or warning occurred during the health check of a service
type HealthState string
const (
// Err State of a service, if an error occurred during the health check of the service
Err HealthState = "ERR"
// Warn State of a service, if a warning occurred during the health check of the service
Warn HealthState = "WARN"
// Ok State of a service, if no warning or error occurred during the health check of the service
Ok HealthState = "OK"
)
// HealthCheckResult describes the result of a health check, contains the state of a service and a message that
// describes the state. If the state is Ok the description can be empty.
// The description should contain the error message if any error or warning occurred during the health check.
type HealthCheckResult struct {
State HealthState
Msg string
}
func init() {
err := env.Parse(&cfg)
if err != nil {
log.Fatalf("Failed to parse health check environment: %v", err)
}
}
func check(ctx context.Context, hcs *sync.Map) map[string]HealthCheckResult {
ctx, cancel := context.WithTimeout(ctx, cfg.HealthCheckMaxWait)
result := make(map[string]HealthCheckResult)
var resultSync sync.Map
var wg sync.WaitGroup
hcs.Range(func(key, value interface{}) bool {
name := key.(string)
hc := value.(HealthCheck)
wg.Add(1)
go func() {
defer wg.Done()
defer errors.HandleWithCtx(ctx, fmt.Sprintf("HealthCheck %s", name))
// If it was not possible to initialize this health check, then show the initialization error message
if val, isIn := initErrors.Load(name); isIn {
state := val.(*ConnectionState)
if done := reInitHealthCheck(ctx, state, name, hc.(Initializable)); done {
resultSync.Store(name, state.GetState())
return
}
}
// this is the actual health check
resultSync.Store(name, hc.HealthCheck(ctx))
}()
return true
})
wg.Wait()
cancel()
resultSync.Range(func(key, value interface{}) bool {
result[key.(string)] = value.(HealthCheckResult)
return true
})
return result
}
func reInitHealthCheck(ctx context.Context, conState *ConnectionState, name string, initHc Initializable) bool {
if time.Since(conState.LastChecked()) < cfg.HealthCheckInitResultErrorTTL {
return true
}
err := initHc.Init(ctx)
if err != nil {
conState.SetErrorState(err)
return true
}
initErrors.Delete(name)
return false
}
func writeResult(w http.ResponseWriter, status int, body string) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
if _, err := fmt.Fprint(w, body); err != nil {
log.Warnf("could not write output: %s", err)
}
}
// RegisterHealthCheck registers a required HealthCheck. The name
// must be unique. If the health check satisfies the Initializable interface, it
// is initialized before it is added.
// It is not possible to add a health check with the same name twice, even if one is required and one is optional
func RegisterHealthCheck(name string, hc HealthCheck) {
registerHealthCheck(&requiredChecks, hc, name)
}
// RegisterHealthCheckFunc registers a required HealthCheck. The name
// must be unique. It is not possible to add a health check with the same name twice,
// even if one is required and one is optional
func RegisterHealthCheckFunc(name string, f HealthCheckFunc) {
RegisterHealthCheck(name, f)
}
// RegisterOptionalHealthCheck registers a HealthCheck like RegisterHealthCheck(hc HealthCheck, name string)
// but the health check is only checked for /health/check and not for /health/
func RegisterOptionalHealthCheck(hc HealthCheck, name string) {
registerHealthCheck(&optionalChecks, hc, name)
}
func registerHealthCheck(checks *sync.Map, hc HealthCheck, name string) {
ctx := log.Logger().WithContext(context.Background())
// check both lists, because
if _, inReq := requiredChecks.Load(name); inReq {
log.Warnf("tried to register health check with name %q twice", name)
return
}
if _, inOpt := optionalChecks.Load(name); inOpt {
log.Warnf("tried to register health check with name %q twice", name)
return
}
if initHC, ok := hc.(Initializable); ok {
if err := initHC.Init(ctx); err != nil {
log.Warnf("error initializing health check %q: %s", name, err)
initErrors.Store(name, &ConnectionState{
lastCheck: time.Now(),
result: HealthCheckResult{
State: Err,
Msg: err.Error(),
},
})
}
}
// save the length of the longest health check name, for the width of the column in /health/check
if len(name) > longestCheckName {
longestCheckName = len(name)
}
checks.Store(name, hc)
}
// HealthHandler returns the health endpoint for transactional processing. This Handler only checks
// the required health checks and returns ERR and 503 or OK and 200.
func HealthHandler() http.Handler {
return &healthHandler{}
}
// ReadableHealthHandler returns the health endpoint with all details about service health. This handler checks
// all health checks. The response body contains two tables (for required and optional health checks)
// with the detailed results of the health checks.
func ReadableHealthHandler() http.Handler {
return &readableHealthHandler{}
}
// JSONHealthHandler return health endpoint with all details about service health. This handler checks
// all health checks. The response body contains a JSON formatted array with every service (required or optional)
// and the detailed health checks about them.
func JSONHealthHandler() http.Handler {
return &jsonHealthHandler{}
}