Skip to content

Commit 46f44ca

Browse files
authored
fix(hatchery): don't send status alert for only 1 dockerengine failure (#5111)
1 parent 76a9939 commit 46f44ca

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

engine/hatchery/swarm/swarm_conf.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ func (h *HatcherySwarm) ApplyConfiguration(cfg interface{}) error {
5757
func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus {
5858
m := h.CommonMonitoring()
5959
m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Workers", Value: fmt.Sprintf("%d/%d", len(h.WorkersStarted(ctx)), h.Config.Provision.MaxWorker), Status: sdk.MonitoringStatusOK})
60+
var nbErrorImageList, nbErrorGetContainers int
6061
for dockerName, dockerClient := range h.dockerClients {
6162
//Check images
6263
status := sdk.MonitoringStatusOK
@@ -65,19 +66,33 @@ func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus {
6566
images, err := dockerClient.ImageList(ctxList, types.ImageListOptions{All: true})
6667
if err != nil {
6768
log.Warning(ctx, "hatchery> swarm> %s> Status> Unable to list images on %s: %s", h.Name(), dockerName, err)
68-
status = sdk.MonitoringStatusAlert
69+
status = sdk.MonitoringStatusWarn
70+
nbErrorImageList++
6971
}
7072
m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Images-" + dockerName, Value: fmt.Sprintf("%d", len(images)), Status: status})
7173
//Check containers
7274
status = sdk.MonitoringStatusOK
7375
cs, err := h.getContainers(dockerClient, types.ContainerListOptions{All: true})
7476
if err != nil {
7577
log.Warning(ctx, "hatchery> swarm> %s> Status> Unable to list containers on %s: %s", h.Name(), dockerName, err)
76-
status = sdk.MonitoringStatusAlert
78+
status = sdk.MonitoringStatusWarn
79+
nbErrorGetContainers++
7780
}
7881
m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Containers-" + dockerName, Value: fmt.Sprintf("%d", len(cs)), Status: status})
7982
}
8083

84+
var status = sdk.MonitoringStatusOK
85+
if nbErrorImageList > len(h.dockerClients)/2 {
86+
status = sdk.MonitoringStatusAlert
87+
}
88+
m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "DockerEngines.ListImages", Value: fmt.Sprintf("%d/%d", nbErrorImageList, len(h.dockerClients)), Status: status})
89+
90+
status = sdk.MonitoringStatusOK
91+
if nbErrorGetContainers > len(h.dockerClients)/2 {
92+
status = sdk.MonitoringStatusAlert
93+
}
94+
m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "DockerEngines.GetContainers", Value: fmt.Sprintf("%d/%d", nbErrorGetContainers, len(h.dockerClients)), Status: status})
95+
8196
return m
8297
}
8398

sdk/status.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ type MonitoringStatusLine struct {
5050
// HTTPStatusCode return the http status code
5151
func (m MonitoringStatus) HTTPStatusCode() int {
5252
for _, l := range m.Lines {
53-
if l.Status != MonitoringStatusOK {
53+
if l.Status == MonitoringStatusAlert {
5454
return http.StatusServiceUnavailable
5555
}
5656
}

0 commit comments

Comments
 (0)