@@ -57,6 +57,7 @@ func (h *HatcherySwarm) ApplyConfiguration(cfg interface{}) error {
5757func (h * HatcherySwarm ) Status (ctx context.Context ) sdk.MonitoringStatus {
5858 m := h .CommonMonitoring ()
5959 m .Lines = append (m .Lines , sdk.MonitoringStatusLine {Component : "Workers" , Value : fmt .Sprintf ("%d/%d" , len (h .WorkersStarted (ctx )), h .Config .Provision .MaxWorker ), Status : sdk .MonitoringStatusOK })
60+ var nbErrorImageList , nbErrorGetContainers int
6061 for dockerName , dockerClient := range h .dockerClients {
6162 //Check images
6263 status := sdk .MonitoringStatusOK
@@ -65,19 +66,33 @@ func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus {
6566 images , err := dockerClient .ImageList (ctxList , types.ImageListOptions {All : true })
6667 if err != nil {
6768 log .Warning (ctx , "hatchery> swarm> %s> Status> Unable to list images on %s: %s" , h .Name (), dockerName , err )
68- status = sdk .MonitoringStatusAlert
69+ status = sdk .MonitoringStatusWarn
70+ nbErrorImageList ++
6971 }
7072 m .Lines = append (m .Lines , sdk.MonitoringStatusLine {Component : "Images-" + dockerName , Value : fmt .Sprintf ("%d" , len (images )), Status : status })
7173 //Check containers
7274 status = sdk .MonitoringStatusOK
7375 cs , err := h .getContainers (dockerClient , types.ContainerListOptions {All : true })
7476 if err != nil {
7577 log .Warning (ctx , "hatchery> swarm> %s> Status> Unable to list containers on %s: %s" , h .Name (), dockerName , err )
76- status = sdk .MonitoringStatusAlert
78+ status = sdk .MonitoringStatusWarn
79+ nbErrorGetContainers ++
7780 }
7881 m .Lines = append (m .Lines , sdk.MonitoringStatusLine {Component : "Containers-" + dockerName , Value : fmt .Sprintf ("%d" , len (cs )), Status : status })
7982 }
8083
84+ var status = sdk .MonitoringStatusOK
85+ if nbErrorImageList > len (h .dockerClients )/ 2 {
86+ status = sdk .MonitoringStatusAlert
87+ }
88+ m .Lines = append (m .Lines , sdk.MonitoringStatusLine {Component : "DockerEngines.ListImages" , Value : fmt .Sprintf ("%d/%d" , nbErrorImageList , len (h .dockerClients )), Status : status })
89+
90+ status = sdk .MonitoringStatusOK
91+ if nbErrorGetContainers > len (h .dockerClients )/ 2 {
92+ status = sdk .MonitoringStatusAlert
93+ }
94+ m .Lines = append (m .Lines , sdk.MonitoringStatusLine {Component : "DockerEngines.GetContainers" , Value : fmt .Sprintf ("%d/%d" , nbErrorGetContainers , len (h .dockerClients )), Status : status })
95+
8196 return m
8297}
8398
0 commit comments