Skip to content

Commit

Permalink
Pinger needs to also handle write timeout (#179)
Browse files Browse the repository at this point in the history
* pinger: handle write timeout and don't treat read timeout as unavailable

* test: handle ping available during partial timeout

* test: bump up pings to 6 in TestPinger_Concurrent

* ping check: change default count from 5 to 6
For background, see https://github.com/racker/ele/pull/2629
  • Loading branch information
itzg committed Nov 13, 2017
1 parent ca0097b commit 7749092
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ install:
- gem install --no-ri --no-rdoc fpm
script:
- sudo sysctl -w net.ipv4.ping_group_range="0 2147483647"
- go test -timeout 30s -short -v $(glide novendor)
- go test -timeout 60s -short -v $(glide novendor)
- CGO_ENABLED=0 gox -osarch "linux/386 linux/amd64 darwin/amd64 windows/386 windows/amd64" -output="build/{{.Dir}}_{{.OS}}_{{.Arch}}"
-ldflags="-X github.com/racker/rackspace-monitoring-poller/version.Version=$(git
symbolic-ref -q --short HEAD || git describe --tags --exact-match)"
Expand Down
7 changes: 3 additions & 4 deletions check/check_ping.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
)

const (
defaultPingCount = 5
defaultPingCount = 6
)

// PingCheck conveys Ping checks
Expand Down Expand Up @@ -123,10 +123,9 @@ packetLoop:
}).Debug("Got ping response")

if resp.Err != nil {
// Latch the error for consideration and inclusion in the final check result
pingErr = resp.Err
if !resp.Timeout {
break packetLoop
// Latch non-timeout errors for consideration and inclusion in the final check result
pingErr = resp.Err
}
continue packetLoop
}
Expand Down
2 changes: 1 addition & 1 deletion check/check_ping_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,5 +178,5 @@ func TestPingCheck_PartialResponses(t *testing.T) {

assert.Equal(t, 1, crs.Length())
AssertMetrics(t, expected, crs.Get(0).Metrics)
assert.False(t, crs.Available)
assert.True(t, crs.Available)
}
25 changes: 21 additions & 4 deletions check/pinger.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,32 @@ func (p *pingerBase) ping(seq int, messageType icmp.Type, perPingDuration time.D

p.packetConn.SetWriteDeadline(time.Now().Add(pingWriteDeadlineDuration))
if _, err = p.packetConn.WriteTo(reqEncoded, p.remoteAddr); err != nil {
log.WithError(err).WithFields(log.Fields{
"prefix": pingLogPrefix,
"identifier": p.identifier,
"id": p.id,
"seq": seq,
"remoteAddr": p.remoteAddr,
}).Warn("Trying to send ping packet")

if isNetTimeoutError(err) {
return PingResponse{Err: err, Timeout: true}
}
return PingResponse{Err: err}
}

// now wait for the response packet matching our ID and remoteAddr
return p.receive(perPingDuration)
}

func isNetTimeoutError(err error) bool {
if netErr, ok := err.(net.Error); ok {
return netErr.Timeout()
} else {
return false
}
}

//noinspection GoBoolExpressions
func (p *pingerBase) receive(perPingDuration time.Duration) PingResponse {

Expand All @@ -238,10 +257,8 @@ recvLoop:
// read a raw ICMP, but since ICMP is a broadcast protocol we don't know if it is ours yet...
n, peerAddr, err := p.packetConn.ReadFrom(buffer)
if err != nil {
if netErr, ok := err.(net.Error); ok {
if netErr.Timeout() {
return PingResponse{Err: err, Timeout: true}
}
if isNetTimeoutError(err) {
return PingResponse{Err: err, Timeout: true}
}

log.WithFields(log.Fields{
Expand Down
21 changes: 14 additions & 7 deletions check/pinger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ func TestPinger_Concurrent(t *testing.T) {
logrus.SetLevel(logrus.DebugLevel)
}

const concurrency = 30
const pings = 5
const concurrency = 75
const pings = 6
var wg sync.WaitGroup

for i := 0; i < concurrency; i++ {
Expand All @@ -100,19 +100,26 @@ func TestPinger_Concurrent(t *testing.T) {

responses := make([]*check.PingResponse, pings)

timeouts := 0

for p := 0; p < pings; p++ {
resp := pinger.Ping(p+1, 1*time.Second)
require.False(t, resp.Timeout, "not expecting timeout for seq=%d, checkId=%s", p+1, checkId)
if resp.Timeout {
timeouts++
require.True(t, timeouts < 3, "not expecting %d timeouts for seq=%d, checkId=%s", timeouts, p+1, checkId)
continue
}
require.True(t, resp.Seq > 0 && resp.Seq <= pings, "invalid seq from resp=%v", resp)
responses[resp.Seq-1] = &resp
time.Sleep(10 * time.Millisecond)
}

for p := 0; p < pings; p++ {
require.NotNil(t, responses[p], "Missing ping seq=%d,checkId=%s", p+1, checkId)
assert.True(t, responses[p].Rtt > 0, "Zero RTT seq=%d", p+1)
assert.NoError(t, responses[p].Err, "seq=%d", p+1)
assert.False(t, responses[p].Timeout, "seq=%d", p+1)
if responses[p] != nil {
assert.True(t, responses[p].Rtt > 0, "Zero RTT seq=%d", p+1)
assert.NoError(t, responses[p].Err, "seq=%d", p+1)
assert.False(t, responses[p].Timeout, "seq=%d", p+1)
}

}

Expand Down

0 comments on commit 7749092

Please sign in to comment.