Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIXED] Recreating ordered consumers on server restart #1425

Merged
merged 4 commits into from
Oct 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
158 changes: 158 additions & 0 deletions jetstream/jetstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,161 @@ func TestValidateSubject(t *testing.T) {
})
}
}

func TestRetryWithBackoff(t *testing.T) {
tests := []struct {
name string
givenOpts backoffOpts
withError bool
timeout time.Duration
cancelAfter time.Duration
successfulAttemptNum int
expectedAttemptsCount int
}{
{
name: "infinite attempts, 5 tries before success",
givenOpts: backoffOpts{
attempts: -1,
initialInterval: 10 * time.Millisecond,
maxInterval: 60 * time.Millisecond,
},
withError: false,
successfulAttemptNum: 5,
// 0ms + 10ms + 20ms + 40ms + 60ms = 130ms
timeout: 200 * time.Millisecond,
expectedAttemptsCount: 5,
},
{
name: "infinite attempts, 5 tries before success, without initial execution",
givenOpts: backoffOpts{
attempts: -1,
initialInterval: 10 * time.Millisecond,
disableInitialExecution: true,
factor: 2,
maxInterval: 60 * time.Millisecond,
},
withError: false,
successfulAttemptNum: 5,
// 10ms + 20ms + 40ms + 60ms + 60 = 190ms
timeout: 250 * time.Millisecond,
expectedAttemptsCount: 5,
},
{
name: "5 attempts, unsuccessful",
givenOpts: backoffOpts{
attempts: 5,
initialInterval: 10 * time.Millisecond,
factor: 2,
maxInterval: 60 * time.Millisecond,
},
withError: true,
// 0ms + 10ms + 20ms + 40ms + 60ms = 130ms
timeout: 200 * time.Millisecond,
expectedAttemptsCount: 5,
},
{
name: "custom backoff values, should override other settings",
givenOpts: backoffOpts{
initialInterval: 2 * time.Second,
factor: 2,
maxInterval: 100 * time.Millisecond,
customBackoff: []time.Duration{10 * time.Millisecond, 20 * time.Millisecond, 30 * time.Millisecond, 40 * time.Millisecond, 50 * time.Millisecond},
},
withError: false,
successfulAttemptNum: 4,
// 10ms + 20ms + 30ms + 40ms = 100ms
timeout: 150 * time.Millisecond,
expectedAttemptsCount: 4,
},
{
name: "no custom backoff, with cancel",
givenOpts: backoffOpts{
attempts: -1,
initialInterval: 100 * time.Millisecond,
factor: 1,
},
withError: false,
cancelAfter: 150 * time.Millisecond,
timeout: 200 * time.Millisecond,
expectedAttemptsCount: 2,
},
{
name: "custom backoff, with cancel",
givenOpts: backoffOpts{
customBackoff: []time.Duration{100 * time.Millisecond, 100 * time.Millisecond, 100 * time.Millisecond},
},
cancelAfter: 150 * time.Millisecond,
expectedAttemptsCount: 1,
timeout: 200 * time.Millisecond,
},
{
name: "attempts num not provided",
givenOpts: backoffOpts{
initialInterval: 100 * time.Millisecond,
factor: 1,
},
withError: true,
timeout: 1 * time.Second,
expectedAttemptsCount: 1,
},
{
name: "custom backoff, but attempts num provided",
givenOpts: backoffOpts{
customBackoff: []time.Duration{100 * time.Millisecond, 100 * time.Millisecond, 100 * time.Millisecond},
attempts: 5,
},
withError: true,
timeout: 1 * time.Second,
expectedAttemptsCount: 1,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ok := make(chan struct{})
errs := make(chan error, 1)

var cancelChan chan struct{}
if test.cancelAfter != 0 {
cancelChan = make(chan struct{})
test.givenOpts.cancel = cancelChan
}
var count int
go func() {
err := retryWithBackoff(func(attempt int) (bool, error) {
count = attempt
if test.successfulAttemptNum != 0 && attempt == test.successfulAttemptNum-1 {
return false, nil
}
return true, fmt.Errorf("error %d", attempt)
}, test.givenOpts)
if err != nil {
errs <- err
return
}
close(ok)
}()
if test.cancelAfter > 0 {
go func() {
time.Sleep(test.cancelAfter)
close(cancelChan)
}()
}
select {
case <-ok:
if test.withError {
t.Fatal("Expected error; got nil")
}
case err := <-errs:
if !test.withError {
t.Fatalf("Unexpected error: %v", err)
}
case <-time.After(test.timeout):
t.Fatalf("Timeout after %v", test.timeout)
}
if count != test.expectedAttemptsCount-1 {
t.Fatalf("Invalid count; want: %d; got: %d", test.expectedAttemptsCount, count)
}
})
}
}
19 changes: 13 additions & 6 deletions jetstream/ordered.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ var errOrderedSequenceMismatch = errors.New("sequence mismatch")

// Consume can be used to continuously receive messages and handle them with the provided callback function
func (c *orderedConsumer) Consume(handler MessageHandler, opts ...PullConsumeOpt) (ConsumeContext, error) {
if c.consumerType == consumerTypeNotSet || c.consumerType == consumerTypeConsume && c.currentConsumer == nil {
if (c.consumerType == consumerTypeNotSet || c.consumerType == consumerTypeConsume) && c.currentConsumer == nil {
err := c.reset()
if err != nil {
return nil, err
Expand All @@ -81,7 +81,7 @@ func (c *orderedConsumer) Consume(handler MessageHandler, opts ...PullConsumeOpt
return nil, ErrOrderConsumerUsedAsFetch
}
c.consumerType = consumerTypeConsume
consumeOpts, err := parseConsumeOpts(opts...)
consumeOpts, err := parseConsumeOpts(true, opts...)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrInvalidOption, err)
}
Expand Down Expand Up @@ -178,7 +178,8 @@ func (c *orderedConsumer) errHandler(serial int) func(cc ConsumeContext, err err
}
if errors.Is(err, ErrNoHeartbeat) ||
errors.Is(err, errOrderedSequenceMismatch) ||
errors.Is(err, ErrConsumerDeleted) {
errors.Is(err, ErrConsumerDeleted) ||
errors.Is(err, ErrConsumerNotFound) {
// only reset if serial matches the current consumer serial and there is no reset in progress
if serial == c.serial && atomic.LoadUint32(&c.resetInProgress) == 0 {
atomic.StoreUint32(&c.resetInProgress, 1)
Expand All @@ -190,7 +191,7 @@ func (c *orderedConsumer) errHandler(serial int) func(cc ConsumeContext, err err

// Messages returns [MessagesContext], allowing continuously iterating over messages on a stream.
func (c *orderedConsumer) Messages(opts ...PullMessagesOpt) (MessagesContext, error) {
if c.consumerType == consumerTypeNotSet || c.consumerType == consumerTypeConsume && c.currentConsumer == nil {
if (c.consumerType == consumerTypeNotSet || c.consumerType == consumerTypeConsume) && c.currentConsumer == nil {
err := c.reset()
if err != nil {
return nil, err
Expand All @@ -202,7 +203,7 @@ func (c *orderedConsumer) Messages(opts ...PullMessagesOpt) (MessagesContext, er
return nil, ErrOrderConsumerUsedAsFetch
}
c.consumerType = consumerTypeConsume
consumeOpts, err := parseMessagesOpts(opts...)
consumeOpts, err := parseMessagesOpts(true, opts...)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrInvalidOption, err)
}
Expand Down Expand Up @@ -386,13 +387,19 @@ func (c *orderedConsumer) reset() error {
defer c.Unlock()
defer atomic.StoreUint32(&c.resetInProgress, 0)
if c.currentConsumer != nil {
c.currentConsumer.Lock()
if c.currentConsumer.subscriptions[""] != nil {
c.currentConsumer.subscriptions[""].Stop()
}
consName := c.currentConsumer.CachedInfo().Name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we confident that consumer name is updated after recreation, so we always delete the proper one?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be good, in line 443 we do:

c.currentConsumer = cons.(*pullConsumer)

This is done right after successful create.

c.currentConsumer.Unlock()
var err error
for i := 0; ; i++ {
if c.cfg.MaxResetAttempts > 0 && i == c.cfg.MaxResetAttempts {
return fmt.Errorf("%w: maximum number of delete attempts reached: %s", ErrOrderedConsumerReset, err)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
err = c.jetStream.DeleteConsumer(ctx, c.stream, c.currentConsumer.CachedInfo().Name)
err = c.jetStream.DeleteConsumer(ctx, c.stream, consName)
cancel()
if err != nil {
if errors.Is(err, ErrConsumerNotFound) {
Expand Down