/
docker_healthcheck.go
82 lines (65 loc) · 2.17 KB
/
docker_healthcheck.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resources
var DockerHealthCheck = `#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=` + "`timeout 60 docker network ls`" + `; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
}
else
echo "docker returned $?"
return 1
fi
}
if healthcheck; then
echo "docker healthy"
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep ` + "`cat /proc/sys/net/ipv4/tcp_fin_timeout`" + `
sleep 10
systemctl start docker
echo "Waiting 120 seconds to give docker time to start"
sleep 60
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"
`