Skip to content

Commit

Permalink
Merge b0b0bd3 into c48e770
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlovic committed May 1, 2020
2 parents c48e770 + b0b0bd3 commit eb5f235
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 5 deletions.
41 changes: 38 additions & 3 deletions nats.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ const (
DefaultPort = 4222
DefaultMaxReconnect = 60
DefaultReconnectWait = 2 * time.Second
DefaultReconnectJitter = time.Second
DefaultTimeout = 2 * time.Second
DefaultPingInterval = 2 * time.Minute
DefaultMaxPingOut = 2
Expand Down Expand Up @@ -130,6 +131,7 @@ func GetDefaultOptions() Options {
AllowReconnect: true,
MaxReconnect: DefaultMaxReconnect,
ReconnectWait: DefaultReconnectWait,
ReconnectJitter: DefaultReconnectJitter,
Timeout: DefaultTimeout,
PingInterval: DefaultPingInterval,
MaxPingsOut: DefaultMaxPingOut,
Expand Down Expand Up @@ -258,6 +260,10 @@ type Options struct {
// to a server that we were already connected to previously.
ReconnectWait time.Duration

// ReconnectJitter sets the upper bound for a random delay added to
// ReconnectWait during a reconnect.
ReconnectJitter time.Duration

// Timeout sets the timeout for a Dial operation on a connection.
Timeout time.Duration

Expand Down Expand Up @@ -411,6 +417,7 @@ type Conn struct {
ptmr *time.Timer
pout int
ar bool // abort reconnect
rqch chan struct{}

// New style response handler
respSub string // The wildcard subject
Expand Down Expand Up @@ -672,6 +679,14 @@ func MaxReconnects(max int) Option {
}
}

// ReconnectJitter is an Option to set the upper bound of a random delay added ReconnectWait.
func ReconnectJitter(jitter time.Duration) Option {
return func(o *Options) error {
o.ReconnectJitter = jitter
return nil
}
}

// PingInterval is an Option to set the period for client ping commands.
func PingInterval(t time.Duration) Option {
return func(o *Options) error {
Expand Down Expand Up @@ -1396,6 +1411,7 @@ func (nc *Conn) setup() {
nc.pongs = make([]chan struct{}, 0, 8)

nc.fch = make(chan struct{}, flushChanSize)
nc.rqch = make(chan struct{})

// Setup scratch outbound buffer for PUB
pub := nc.scratch[:len(_PUB_P_)]
Expand Down Expand Up @@ -1818,6 +1834,8 @@ func (nc *Conn) doReconnect(err error) {
// This is used to wait on go routines exit if we start them in the loop
// but an error occurs after that.
waitForGoRoutines := false
var rt *time.Timer
rqch := nc.rqch

for len(nc.srvPool) > 0 {
cur, err := nc.selectNextServer()
Expand All @@ -1844,7 +1862,20 @@ func (nc *Conn) doReconnect(err error) {
if sleepTime <= 0 {
runtime.Gosched()
} else {
time.Sleep(time.Duration(sleepTime))
if nc.Opts.ReconnectJitter > 0 {
sleepTime += rand.Int63n(int64(nc.Opts.ReconnectJitter))
}
st := time.Duration(sleepTime)
if rt == nil {
rt = time.NewTimer(st)
} else {
rt.Reset(st)
}
select {
case <-rqch:
rt.Stop()
case <-rt.C:
}
}
// If the readLoop, etc.. go routines were started, wait for them to complete.
if waitForGoRoutines {
Expand Down Expand Up @@ -3655,9 +3686,13 @@ func (nc *Conn) close(status Status, doCBs bool, err error) {

// Kick the Go routines so they fall out.
nc.kickFlusher()
nc.mu.Unlock()

nc.mu.Lock()
// If the reconnect timer is waiting between a reconnect attempt,
// this will kick it out.
if nc.rqch != nil {
close(nc.rqch)
nc.rqch = nil
}

// Clear any queued pongs, e.g. pending flush calls.
nc.clearPendingFlushCalls()
Expand Down
62 changes: 61 additions & 1 deletion nats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ func TestExpiredUserCredentials(t *testing.T) {
url := fmt.Sprintf("nats://127.0.0.1:%d", addr.Port)
nc, err := Connect(url,
ReconnectWait(25*time.Millisecond),
ReconnectJitter(0),
MaxReconnects(-1),
ErrorHandler(func(_ *Conn, _ *Subscription, e error) {
select {
Expand Down Expand Up @@ -1583,6 +1584,7 @@ func TestExpiredUserCredentialsRenewal(t *testing.T) {
nc, err := Connect(url,
UserCredentials(chainedFile),
ReconnectWait(25*time.Millisecond),
ReconnectJitter(0),
MaxReconnects(2),
ReconnectHandler(func(nc *Conn) {
rch <- true
Expand Down Expand Up @@ -2075,6 +2077,7 @@ func TestAuthErrorOnReconnect(t *testing.T) {
urls := fmt.Sprintf("nats://%s:%d, nats://%s:%d", o1.Host, o1.Port, o2.Host, o2.Port)
nc, err := Connect(urls,
ReconnectWait(25*time.Millisecond),
ReconnectJitter(0),
MaxReconnects(-1),
DontRandomize(),
DisconnectErrHandler(func(_ *Conn, e error) {
Expand Down Expand Up @@ -2269,7 +2272,7 @@ func TestGetRTT(t *testing.T) {
s := RunServerOnPort(-1)
defer s.Shutdown()

nc, err := Connect(s.ClientURL(), ReconnectWait(10*time.Millisecond))
nc, err := Connect(s.ClientURL(), ReconnectWait(10*time.Millisecond), ReconnectJitter(0))
if err != nil {
t.Fatalf("Expected to connect to server, got %v", err)
}
Expand Down Expand Up @@ -2389,3 +2392,60 @@ func TestNoPanicOnSrvPoolSizeChanging(t *testing.T) {
}
wg.Wait()
}

func TestReconnectWaitJitter(t *testing.T) {
s := RunServerOnPort(TEST_PORT)
defer s.Shutdown()

rch := make(chan time.Time, 1)
nc, err := Connect(s.ClientURL(),
ReconnectWait(100*time.Millisecond),
ReconnectJitter(time.Second),
ReconnectHandler(func(_ *Conn) {
rch <- time.Now()
}),
)
if err != nil {
t.Fatalf("Error during connect: %v", err)
}
defer nc.Close()

nc.mu.Lock()
start := time.Now()
nc.conn.Close()
nc.mu.Unlock()
select {
case end := <-rch:
dur := end.Sub(start)
// We should wait at least the reconnect wait + random up to 1 sec.
if dur < 90*time.Millisecond || dur > 1200*time.Millisecond {
t.Fatalf("Wrong wait: %v", dur)
}
case <-time.After(5 * time.Second):
t.Fatalf("Should have reconnected")
}
nc.Close()

// Use a long reconnect wait
nc, err = Connect(s.ClientURL(), ReconnectWait(100*time.Second))
if err != nil {
t.Fatalf("Error during connect: %v", err)
}
defer nc.Close()

// Cause a disconnect
nc.mu.Lock()
nc.conn.Close()
nc.mu.Unlock()
// Wait a bit for the reconnect loop to go into wait mode.
time.Sleep(50 * time.Millisecond)
// Now close and expect the reconnect go routine to return..
nc.Close()
// Wait a bit to give a chance for the go routine to exit.
time.Sleep(50 * time.Millisecond)
buf := make([]byte, 100000)
n := runtime.Stack(buf, true)
if strings.Contains(string(buf[:n]), "doReconnect") {
t.Fatalf("doReconnect go routine still running:\n%s", buf[:n])
}
}
2 changes: 2 additions & 0 deletions test/auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ func TestAuthFailAllowReconnect(t *testing.T) {
copts.NoRandomize = true
copts.MaxReconnect = 10
copts.ReconnectWait = 100 * time.Millisecond
copts.ReconnectJitter = 0

copts.ReconnectedCB = func(_ *nats.Conn) {
reconnectch <- true
Expand Down Expand Up @@ -174,6 +175,7 @@ func TestTokenHandlerReconnect(t *testing.T) {
copts.NoRandomize = true
copts.MaxReconnect = 10
copts.ReconnectWait = 100 * time.Millisecond
copts.ReconnectJitter = 0

copts.TokenHandler = func() string {
return secret
Expand Down
1 change: 1 addition & 0 deletions test/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,7 @@ func TestOptions(t *testing.T) {
nats.Name("myName"),
nats.MaxReconnects(2),
nats.ReconnectWait(50*time.Millisecond),
nats.ReconnectJitter(0),
nats.PingInterval(20*time.Millisecond))
if err != nil {
t.Fatalf("Failed to connect: %v", err)
Expand Down
7 changes: 7 additions & 0 deletions test/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ func TestHotSpotReconnect(t *testing.T) {

opts := []nats.Option{
nats.ReconnectWait(50 * time.Millisecond),
nats.ReconnectJitter(0),
nats.ReconnectHandler(func(_ *nats.Conn) { wg.Done() }),
}

Expand Down Expand Up @@ -389,6 +390,7 @@ func TestProperFalloutAfterMaxAttempts(t *testing.T) {
}
opts.NoRandomize = true
opts.ReconnectWait = (25 * time.Millisecond)
opts.ReconnectJitter = 0

dch := make(chan bool)
opts.DisconnectedErrCB = func(_ *nats.Conn, _ error) {
Expand Down Expand Up @@ -456,6 +458,7 @@ func TestProperFalloutAfterMaxAttemptsWithAuthMismatch(t *testing.T) {
opts.MaxReconnect = 5
}
opts.ReconnectWait = (25 * time.Millisecond)
opts.ReconnectJitter = 0

dch := make(chan bool)
opts.DisconnectedErrCB = func(_ *nats.Conn, _ error) {
Expand Down Expand Up @@ -519,11 +522,13 @@ func TestTimeoutOnNoServers(t *testing.T) {
opts.Servers = testServers[:2]
opts.MaxReconnect = 2
opts.ReconnectWait = (100 * time.Millisecond)
opts.ReconnectJitter = 0
} else {
opts.Servers = testServers
// 1 second total time wait
opts.MaxReconnect = 10
opts.ReconnectWait = (100 * time.Millisecond)
opts.ReconnectJitter = 0
}
opts.NoRandomize = true

Expand Down Expand Up @@ -584,6 +589,7 @@ func TestPingReconnect(t *testing.T) {
opts.Servers = testServers
opts.NoRandomize = true
opts.ReconnectWait = 200 * time.Millisecond
opts.ReconnectJitter = 0
opts.PingInterval = 50 * time.Millisecond
opts.MaxPingsOut = -1

Expand Down Expand Up @@ -813,6 +819,7 @@ func TestServerPoolUpdatedWhenRouteGoesAway(t *testing.T) {
nc, err = nats.Connect(s1Url,
nats.MaxReconnects(10),
nats.ReconnectWait(15*time.Millisecond),
nats.ReconnectJitter(0),
nats.SetCustomDialer(d),
nats.ReconnectHandler(connHandler),
nats.ClosedHandler(connHandler))
Expand Down
7 changes: 6 additions & 1 deletion test/conn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ func TestCallbacksOrder(t *testing.T) {
nats.ClosedHandler(cch),
nats.ErrorHandler(ech),
nats.ReconnectWait(50*time.Millisecond),
nats.ReconnectJitter(0),
nats.DontRandomize())

if err != nil {
Expand Down Expand Up @@ -1158,6 +1159,7 @@ func TestErrStaleConnection(t *testing.T) {
opts.ReconnectedCB = func(_ *nats.Conn) { rch <- true }
opts.ClosedCB = func(_ *nats.Conn) { cch <- true }
opts.ReconnectWait = 20 * time.Millisecond
opts.ReconnectJitter = 0
opts.MaxReconnect = 100
opts.Servers = []string{natsURL}
nc, err := opts.Connect()
Expand Down Expand Up @@ -1247,6 +1249,7 @@ func TestServerErrorClosesConnection(t *testing.T) {
opts.ReconnectedCB = func(_ *nats.Conn) { atomic.AddInt64(&reconnected, 1) }
opts.ClosedCB = func(_ *nats.Conn) { cch <- true }
opts.ReconnectWait = 20 * time.Millisecond
opts.ReconnectJitter = 0
opts.MaxReconnect = 100
opts.Servers = []string{natsURL}
nc, err := opts.Connect()
Expand Down Expand Up @@ -1321,7 +1324,8 @@ func TestNoRaceOnLastError(t *testing.T) {
nats.DisconnectHandler(dch),
nats.ClosedHandler(cch),
nats.MaxReconnects(-1),
nats.ReconnectWait(5*time.Millisecond))
nats.ReconnectWait(5*time.Millisecond),
nats.ReconnectJitter(0))
if err != nil {
t.Fatalf("Unable to connect: %v\n", err)
}
Expand Down Expand Up @@ -1985,6 +1989,7 @@ func TestReceiveInfoWithEmptyConnectURLs(t *testing.T) {
rch := make(chan bool)
nc, err := nats.Connect("nats://127.0.0.1:4222",
nats.ReconnectWait(50*time.Millisecond),
nats.ReconnectJitter(0),
nats.ReconnectHandler(func(_ *nats.Conn) {
rch <- true
}))
Expand Down
10 changes: 10 additions & 0 deletions test/reconnect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ func TestReconnectTotalTime(t *testing.T) {
}
}

func TestDefaultReconnectJitter(t *testing.T) {
opts := nats.GetDefaultOptions()
if opts.ReconnectJitter != nats.DefaultReconnectJitter {
t.Fatalf("Expected default jitter to be %v, got %v", nats.DefaultReconnectJitter, opts.ReconnectJitter)
}
}

func TestReconnectDisallowedFlags(t *testing.T) {
ts := startReconnectServer(t)
defer ts.Shutdown()
Expand Down Expand Up @@ -73,6 +80,7 @@ func TestReconnectAllowedFlags(t *testing.T) {
opts.AllowReconnect = true
opts.MaxReconnect = 2
opts.ReconnectWait = 1 * time.Second
opts.ReconnectJitter = 0

opts.ClosedCB = func(_ *nats.Conn) {
ch <- true
Expand Down Expand Up @@ -435,6 +443,7 @@ func TestIsReconnectingAndStatus(t *testing.T) {
opts.AllowReconnect = true
opts.MaxReconnect = 10000
opts.ReconnectWait = 100 * time.Millisecond
opts.ReconnectJitter = 0

opts.DisconnectedErrCB = func(_ *nats.Conn, _ error) {
disconnectedch <- true
Expand Down Expand Up @@ -504,6 +513,7 @@ func TestFullFlushChanDuringReconnect(t *testing.T) {
opts.AllowReconnect = true
opts.MaxReconnect = 10000
opts.ReconnectWait = 100 * time.Millisecond
opts.ReconnectJitter = 0

opts.ReconnectedCB = func(_ *nats.Conn) {
reconnectch <- true
Expand Down
2 changes: 2 additions & 0 deletions test/sub_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func TestAutoUnsubAndReconnect(t *testing.T) {

nc, err := nats.Connect(nats.DefaultURL,
nats.ReconnectWait(50*time.Millisecond),
nats.ReconnectJitter(0),
nats.ReconnectHandler(func(_ *nats.Conn) { rch <- true }))
if err != nil {
t.Fatalf("Unable to connect: %v", err)
Expand Down Expand Up @@ -201,6 +202,7 @@ func TestAutoUnsubWithParallelNextMsgCalls(t *testing.T) {

nc, err := nats.Connect(nats.DefaultURL,
nats.ReconnectWait(50*time.Millisecond),
nats.ReconnectJitter(0),
nats.ReconnectHandler(func(_ *nats.Conn) { rch <- true }))
if err != nil {
t.Fatalf("Unable to connect: %v", err)
Expand Down

0 comments on commit eb5f235

Please sign in to comment.