Skip to content

Commit

Permalink
- Add ReconnectJitter option setter
Browse files Browse the repository at this point in the history
- Defaut options now will use 10ms for non TLS and 1sec for TLS
- Add CustomReconnectDelay option setter to get user-selected
  delay. The number of attempts (full URL list) is passed to
  the callback.

Signed-off-by: Ivan Kozlovic <ivan@synadia.com>
  • Loading branch information
kozlovic committed May 5, 2020
1 parent fa41411 commit da6924f
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 78 deletions.
2 changes: 2 additions & 0 deletions go.mod
@@ -1,5 +1,7 @@
module github.com/nats-io/nats.go

go 1.14

require (
github.com/nats-io/jwt v0.3.2
github.com/nats-io/nkeys v0.1.4
Expand Down
150 changes: 110 additions & 40 deletions nats.go
Expand Up @@ -45,19 +45,21 @@ import (

// Default Constants
const (
Version = "1.9.2"
DefaultURL = "nats://127.0.0.1:4222"
DefaultPort = 4222
DefaultMaxReconnect = 60
DefaultReconnectWait = 2 * time.Second
DefaultTimeout = 2 * time.Second
DefaultPingInterval = 2 * time.Minute
DefaultMaxPingOut = 2
DefaultMaxChanLen = 8192 // 8k
DefaultReconnectBufSize = 8 * 1024 * 1024 // 8MB
RequestChanLen = 8
DefaultDrainTimeout = 30 * time.Second
LangString = "go"
Version = "1.9.2"
DefaultURL = "nats://127.0.0.1:4222"
DefaultPort = 4222
DefaultMaxReconnect = 60
DefaultReconnectWait = 2 * time.Second
DefaultReconnectJitterNonTLS = 100 * time.Millisecond
DefaultReconnectJitterTLS = time.Second
DefaultTimeout = 2 * time.Second
DefaultPingInterval = 2 * time.Minute
DefaultMaxPingOut = 2
DefaultMaxChanLen = 8192 // 8k
DefaultReconnectBufSize = 8 * 1024 * 1024 // 8MB
RequestChanLen = 8
DefaultDrainTimeout = 30 * time.Second
LangString = "go"
)

const (
Expand Down Expand Up @@ -127,15 +129,17 @@ func init() {
// GetDefaultOptions returns default configuration options for the client.
func GetDefaultOptions() Options {
return Options{
AllowReconnect: true,
MaxReconnect: DefaultMaxReconnect,
ReconnectWait: DefaultReconnectWait,
Timeout: DefaultTimeout,
PingInterval: DefaultPingInterval,
MaxPingsOut: DefaultMaxPingOut,
SubChanLen: DefaultMaxChanLen,
ReconnectBufSize: DefaultReconnectBufSize,
DrainTimeout: DefaultDrainTimeout,
AllowReconnect: true,
MaxReconnect: DefaultMaxReconnect,
ReconnectWait: DefaultReconnectWait,
ReconnectJitterNonTLS: DefaultReconnectJitterNonTLS,
ReconnectJitterTLS: DefaultReconnectJitterTLS,
Timeout: DefaultTimeout,
PingInterval: DefaultPingInterval,
MaxPingsOut: DefaultMaxPingOut,
SubChanLen: DefaultMaxChanLen,
ReconnectBufSize: DefaultReconnectBufSize,
DrainTimeout: DefaultDrainTimeout,
}
}

Expand Down Expand Up @@ -182,6 +186,12 @@ type SignatureHandler func([]byte) ([]byte, error)
// AuthTokenHandler is used to generate a new token.
type AuthTokenHandler func() string

// ReconnectDelayHandler is used to get from the user the desired
// delay the library should pause before attempting to reconnect
// again. Note that this is invoked after the library tried the
// whole list of URLs and failed to reconnect.
type ReconnectDelayHandler func(attempts int) time.Duration

// asyncCB is used to preserve order for async callbacks.
type asyncCB struct {
f func()
Expand Down Expand Up @@ -258,6 +268,24 @@ type Options struct {
// to a server that we were already connected to previously.
ReconnectWait time.Duration

// CustomReconnectDelayCB is invoked after the library tried every
// URL in the server list and failed to reconnect. It passes to the
// user the current number of attempts. This function returns the
// amount of time the library will sleep before attempting to reconnect
// again. It is strongly recommended that this value contains some
// jitter to prevent all connections to attempt reconnecting at the same time.
CustomReconnectDelayCB ReconnectDelayHandler

// ReconnectJitterNonTLS sets the upper bound for a random delay added to
// ReconnectWait during a reconnect when no TLS is used.
// Note that any jitter is capped with ReconnectJitterMax.
ReconnectJitterNonTLS time.Duration

// ReconnectJitterTLS sets the upper bound for a random delay added to
// ReconnectWait during a reconnect when TLS is used.
// Note that any jitter is capped with ReconnectJitterMax.
ReconnectJitterTLS time.Duration

// Timeout sets the timeout for a Dial operation on a connection.
Timeout time.Duration

Expand Down Expand Up @@ -673,6 +701,24 @@ func MaxReconnects(max int) Option {
}
}

// ReconnectJitter is an Option to set the upper bound of a random delay added ReconnectWait.
func ReconnectJitter(nonTLS, TLS time.Duration) Option {
return func(o *Options) error {
o.ReconnectJitterNonTLS = nonTLS
o.ReconnectJitterTLS = TLS
return nil
}
}

// CustomReconnectDelay is an Option to set the CustomReconnectDelayCB option.
// See CustomReconnectDelayCB Option for more details.
func CustomReconnectDelay(cb ReconnectDelayHandler) Option {
return func(o *Options) error {
o.CustomReconnectDelayCB = cb
return nil
}
}

// PingInterval is an Option to set the period for client ping commands.
func PingInterval(t time.Duration) Option {
return func(o *Options) error {
Expand Down Expand Up @@ -1820,37 +1866,61 @@ func (nc *Conn) doReconnect(err error) {
// This is used to wait on go routines exit if we start them in the loop
// but an error occurs after that.
waitForGoRoutines := false
rt := time.NewTimer(0)
var rt *time.Timer
// Channel used to kick routine out of sleep when conn is closed.
rqch := nc.rqch
// Counter that is increased when the whole list of servers has been tried.
var wlf int

var jitter time.Duration
var rw time.Duration
// If a custom reconnect delay handler is set, this takes precedence.
crd := nc.Opts.CustomReconnectDelayCB
if crd == nil {
rw = nc.Opts.ReconnectWait
// TODO: since we sleep only after the whole list has been tried, we can't
// rely on individual *srv to know if it is a TLS or non-TLS url.
// We have to pick which type of jitter to use, for now, we use these hints:
jitter = nc.Opts.ReconnectJitterNonTLS
if nc.Opts.Secure || nc.Opts.TLSConfig != nil {
jitter = nc.Opts.ReconnectJitterTLS
}
}

for len(nc.srvPool) > 0 {
for i := 0; len(nc.srvPool) > 0; {
cur, err := nc.selectNextServer()
if err != nil {
nc.err = err
break
}

sleepTime := int64(0)

// Sleep appropriate amount of time before the
// connection attempt if connecting to same server
// we just got disconnected from..
if time.Since(cur.lastAttempt) < nc.Opts.ReconnectWait {
sleepTime = int64(nc.Opts.ReconnectWait - time.Since(cur.lastAttempt))
}

// On Windows, createConn() will take more than a second when no
// server is running at that address. So it could be that the
// time elapsed between reconnect attempts is always > than
// the set option. Release the lock to give a chance to a parallel
// nc.Close() to break the loop.
doSleep := i+1 >= len(nc.srvPool)
nc.mu.Unlock()
if sleepTime <= 0 {

if !doSleep {
i++
// Release the lock to give a chance to a concurrent nc.Close() to break the loop.
runtime.Gosched()
} else {
rt.Reset(time.Duration(rand.Int63n(sleepTime)))
i = 0
var st time.Duration
if crd != nil {
wlf++
st = crd(wlf)
} else {
st = rw
if jitter > 0 {
st += time.Duration(rand.Int63n(int64(jitter)))
}
}
if rt == nil {
rt = time.NewTimer(st)
} else {
rt.Reset(st)
}
select {
case <-rqch:
rt.Stop()
case <-rt.C:
}
}
Expand Down

0 comments on commit da6924f

Please sign in to comment.