Skip to content

Commit

Permalink
Minor fixes in force replication verification (#4675)
Browse files Browse the repository at this point in the history
<!-- Describe what has changed in this PR -->

**What changed?**
- Minor fixes in force replication verification - Sleep first in
verfication step
- Remove RetryableTime error

<!-- Tell your future self why have you made these changes -->
**Why?**


<!-- How have you verified this change? Tested locally? Added a unit
test? Checked in staging env? -->
**How did you test it?**


<!-- Assuming the worst case, what can be broken when deploying this
change to production? -->
**Potential risks**


<!-- Is this PR a hotfix candidate or require that a notification be
sent to the broader community? (Yes/No) -->
**Is hotfix candidate?**
  • Loading branch information
hehaifengcn committed Jul 31, 2023
1 parent 55c4d36 commit ec75820
Showing 1 changed file with 11 additions and 18 deletions.
29 changes: 11 additions & 18 deletions service/worker/migration/activities.go
Expand Up @@ -644,7 +644,6 @@ func (a *activities) verifyReplicationTasks(
}

const (
defaultNoProgressRetryableTimeout = 5 * time.Minute
defaultNoProgressNotRetryableTimeout = 15 * time.Minute
)

Expand Down Expand Up @@ -681,6 +680,10 @@ func (a *activities) VerifyReplicationTasks(ctx context.Context, request *verify
// - more than NonRetryableTimeout, it means potentially we encountered #4. The activity returns
// non-retryable error and force-replication workflow will restarted.
for {

// Since replication has a lag, sleep first.
time.Sleep(request.VerifyInterval)

verified, progress, err := a.verifyReplicationTasks(ctx, request, &details, remoteClient)
if err != nil {
return err
Expand All @@ -697,23 +700,13 @@ func (a *activities) VerifyReplicationTasks(ctx context.Context, request *verify
}

diff := time.Now().Sub(details.CheckPoint)
if diff > defaultNoProgressRetryableTimeout {
if diff > defaultNoProgressNotRetryableTimeout {
// Potentially encountered a missing execution, return non-retryable error
return temporal.NewNonRetryableApplicationError(
fmt.Sprintf("verifyReplicationTasks was not able to make progress for more than %v minutes (not retryable). Not found WorkflowExecution: %v, Checkpoint: %v",
diff.Minutes(),
details.LastNotFoundWorkflowExecution, details.CheckPoint),
"", nil)
}

// return error to trigger activity retry
return verifyReplicationTasksTimeoutErr{
timeout: diff,
details: details,
}
if diff > defaultNoProgressNotRetryableTimeout {
// Potentially encountered a missing execution, return non-retryable error
return temporal.NewNonRetryableApplicationError(
fmt.Sprintf("verifyReplicationTasks was not able to make progress for more than %v minutes (not retryable). Not found WorkflowExecution: %v, Checkpoint: %v",
diff.Minutes(),
details.LastNotFoundWorkflowExecution, details.CheckPoint),
"", nil)
}

time.Sleep(request.VerifyInterval)
}
}

0 comments on commit ec75820

Please sign in to comment.