Skip to content

Commit

Permalink
Merge d2b268a into 33ac5de
Browse files Browse the repository at this point in the history
  • Loading branch information
SchlottiP committed Jan 29, 2020
2 parents 33ac5de + d2b268a commit 21d8c91
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 28 deletions.
8 changes: 6 additions & 2 deletions backend/postgres/health_postgres.go
Expand Up @@ -6,15 +6,19 @@ package postgres
import (
"time"

"github.com/go-pg/pg"
"github.com/go-pg/pg/orm"
"github.com/pace/bricks/maintenance/health/servicehealthcheck"
)

// HealthCheck checks the state of a postgres connection. It must not be changed
// after it was registered as a health check.
type HealthCheck struct {
state servicehealthcheck.ConnectionState
Pool *pg.DB
Pool postgresQueryExecutor
}

type postgresQueryExecutor interface {
Exec(query interface{}, params ...interface{}) (res orm.Result, err error)
}

// Init initialises the test table
Expand Down
35 changes: 35 additions & 0 deletions backend/postgres/health_postgres_test.go
Expand Up @@ -9,9 +9,14 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"

"github.com/go-pg/pg/orm"
http2 "github.com/pace/bricks/http"
"github.com/pace/bricks/maintenance/errors"
"github.com/pace/bricks/maintenance/health/servicehealthcheck"
"github.com/pace/bricks/maintenance/log"
"github.com/stretchr/testify/require"
)

func setup() *http.Response {
Expand Down Expand Up @@ -41,3 +46,33 @@ func TestIntegrationHealthCheck(t *testing.T) {
t.Errorf("Expected /health/check to return OK, got: %q", string(data[:]))
}
}

type testPool struct {
err error
}

func (t *testPool) Exec(query interface{}, params ...interface{}) (res orm.Result, err error) {
return nil, t.err
}

func TestHealthCheckCaching(t *testing.T) {
// set the TTL to a minute because this is long enough to test that the result is cached
cfg.HealthCheckResultTTL = time.Minute
requiredErr := errors.New("TestHealthCheckCaching")
pool := &testPool{err: requiredErr}
h := &HealthCheck{Pool: pool}
res := h.HealthCheck()
// get the error for the first time
require.Equal(t, servicehealthcheck.Err, res.State)
require.Equal(t, "TestHealthCheckCaching", res.Msg)
res = h.HealthCheck()
pool.err = nil
// getting the cached error
require.Equal(t, servicehealthcheck.Err, res.State)
require.Equal(t, "TestHealthCheckCaching", res.Msg)
// Resetting the TTL to get a uncached result
cfg.HealthCheckResultTTL = 0
res = h.HealthCheck()
require.Equal(t, servicehealthcheck.Ok, res.State)
require.Equal(t, "", res.Msg)
}
3 changes: 3 additions & 0 deletions maintenance/health/servicehealthcheck/README.md
@@ -1,4 +1,5 @@
# Health Checks

* Makes it possible to add a health check for a service (e.g. postgres). The list of checks is checked for the routes
`/health` and `/health/check`

Expand Down Expand Up @@ -42,3 +43,5 @@ for caching
* `/health` => OK and WARN means the service is healthy.
* `/health/check` => the complete result of the check is added to the response

## Environment Variables
`HEALTH_CHECK_INIT_RESULT_ERROR_TTL` : Amount of time to cache the errors that occur in the initialisation of the HealthCheck
47 changes: 42 additions & 5 deletions maintenance/health/servicehealthcheck/healthchecker.go
Expand Up @@ -7,7 +7,9 @@ import (
"fmt"
"net/http"
"sync"
"time"

"github.com/caarlos0/env"
"github.com/pace/bricks/maintenance/log"
)

Expand All @@ -22,13 +24,20 @@ type Initialisable interface {
Init() error
}

type config struct {
// Amount of time to cache the last init
HealthCheckInitResultErrorTTL time.Duration `env:"HEALTH_CHECK_INIT_RESULT_ERROR_TTL" envDefault:"10s"`
}

var cfg config

// requiredChecks contains all required registered Health Checks - key:Name
var requiredChecks sync.Map

// optionalChecks contains all optional registered Health Checks - key:Name
var optionalChecks sync.Map

// initErrors map with all errors that happened in the initialisation of any health check - key:Name
// initErrors map with all err ConnectionState that happened in the initialisation of any health check - key:Name
var initErrors sync.Map

// HealthState describes if a any error or warning occurred during the health check of a service
Expand All @@ -51,16 +60,23 @@ type HealthCheckResult struct {
Msg string
}

func init() {
err := env.Parse(&cfg)
if err != nil {
log.Fatalf("Failed to parse health check environment: %v", err)
}
}

func check(hcs *sync.Map) map[string]HealthCheckResult {
result := make(map[string]HealthCheckResult)
hcs.Range(func(key, value interface{}) bool {
name := key.(string)
hc := value.(HealthCheck)
// If it was not possible to initialise this health check, then show the initialisation error message
if val, isIn := initErrors.Load(name); isIn {
err := val.(error)
result[name] = HealthCheckResult{State: Err, Msg: err.Error()}
return true
if done := reInitHealthCheck(val.(*ConnectionState), result, name, hc.(Initialisable)); done {
return true
}
}
// this is the actual health check
result[name] = hc.HealthCheck()
Expand All @@ -69,6 +85,21 @@ func check(hcs *sync.Map) map[string]HealthCheckResult {
return result
}

func reInitHealthCheck(conState *ConnectionState, result map[string]HealthCheckResult, name string, initHc Initialisable) bool {
if time.Since(conState.LastChecked()) < cfg.HealthCheckInitResultErrorTTL {
result[name] = conState.GetState()
return true
}
err := initHc.Init()
if err != nil {
conState.SetErrorState(err)
result[name] = conState.GetState()
return true
}
initErrors.Delete(name)
return false
}

func writeResult(w http.ResponseWriter, status int, body string) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
Expand Down Expand Up @@ -104,7 +135,13 @@ func registerHealthCheck(checks *sync.Map, hc HealthCheck, name string) {
if initHC, ok := hc.(Initialisable); ok {
if err := initHC.Init(); err != nil {
log.Warnf("error initialising health check %q: %s", name, err)
initErrors.Store(name, err)
initErrors.Store(name, &ConnectionState{
lastCheck: time.Now(),
result: HealthCheckResult{
State: Err,
Msg: err.Error(),
},
})
}
}
// save the length of the longest health check name, for the width of the column in /health/check
Expand Down
90 changes: 69 additions & 21 deletions maintenance/health/servicehealthcheck/healthchecker_test.go
Expand Up @@ -12,6 +12,7 @@ import (
"strings"
"sync"
"testing"
"time"

"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -77,10 +78,7 @@ func TestHandlerHealthCheck(t *testing.T) {
}
for _, tc := range testCases {
t.Run(tc.title, func(t *testing.T) {
//remove all previous health checks
requiredChecks = sync.Map{}
optionalChecks = sync.Map{}
initErrors = sync.Map{}
resetHealthChecks()
rec := httptest.NewRecorder()
RegisterHealthCheck(tc.check, tc.check.name)
req := httptest.NewRequest("GET", "/health/", nil)
Expand All @@ -96,27 +94,80 @@ func TestHandlerHealthCheck(t *testing.T) {
}
}

func TestHandlerHealthCheckOptional(t *testing.T) {
checkOpt := &testHealthChecker{name: "TestHandlerHealthCheckErr", healthCheckErr: true}
checkReq := &testHealthChecker{name: "TestOk"}
//remove all previous health checks
requiredChecks = sync.Map{}
optionalChecks = sync.Map{}
initErrors = sync.Map{}
func TestInitErrorRetry(t *testing.T) {
// No caching of the init results
cfg.HealthCheckInitResultErrorTTL = 0

rec := httptest.NewRecorder()
RegisterHealthCheck(checkReq, checkReq.name)
RegisterOptionalHealthCheck(checkOpt, checkOpt.name)
resetHealthChecks()

// Create Check with initErr
checker := &testHealthChecker{
initErr: true,
healthCheckErr: false,
healthCheckWarn: false,
name: "initRetry",
}
RegisterHealthCheck(checker, checker.name)

testRequest(t, http.StatusServiceUnavailable, "ERR")

// remove initErr
checker.initErr = false
testRequest(t, http.StatusOK, "OK")
}

func testRequest(t *testing.T, expCode int, expBody string) {
req := httptest.NewRequest("GET", "/health/", nil)
rec := httptest.NewRecorder()
HealthHandler().ServeHTTP(rec, req)
resp := rec.Result()

require.Equal(t, http.StatusOK, resp.StatusCode)
require.Equal(t, expCode, resp.StatusCode)
data, err := ioutil.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, "OK", string(data))
require.Equal(t, expBody, string(data))
}

func resetHealthChecks() {
//remove all previous health checks
requiredChecks = sync.Map{}
optionalChecks = sync.Map{}
initErrors = sync.Map{}
}

func TestInitErrorCaching(t *testing.T) {
cfg.HealthCheckInitResultErrorTTL = time.Hour
hc := &testHealthChecker{
initErr: true,
healthCheckErr: false,
healthCheckWarn: false,
name: "initErrorCaching",
}

resetHealthChecks()
RegisterHealthCheck(hc, hc.name)

testRequest(t, http.StatusServiceUnavailable, "ERR")
hc.initErr = false
testRequest(t, http.StatusServiceUnavailable, "ERR")

cfg.HealthCheckInitResultErrorTTL = 0
resetHealthChecks()
hc.initErr = true
RegisterHealthCheck(hc, hc.name)
testRequest(t, http.StatusServiceUnavailable, "ERR")
hc.initErr = false
testRequest(t, http.StatusOK, "OK")

}
func TestHandlerHealthCheckOptional(t *testing.T) {
checkOpt := &testHealthChecker{name: "TestHandlerHealthCheckErr", healthCheckErr: true}
checkReq := &testHealthChecker{name: "TestOk"}
resetHealthChecks()

RegisterHealthCheck(checkReq, checkReq.name)
RegisterOptionalHealthCheck(checkOpt, checkOpt.name)

testRequest(t, http.StatusOK, "OK")
}

func TestHandlerReadableHealthCheck(t *testing.T) {
Expand Down Expand Up @@ -166,10 +217,7 @@ func TestHandlerReadableHealthCheck(t *testing.T) {
}
for _, tc := range testcases {
t.Run(tc.title, func(t *testing.T) {
//remove all previous health checks
requiredChecks = sync.Map{}
optionalChecks = sync.Map{}
initErrors = sync.Map{}
resetHealthChecks()

rec := httptest.NewRecorder()
for _, hc := range tc.req {
Expand Down

0 comments on commit 21d8c91

Please sign in to comment.