From 5dc16b7cd09180aa299553161fb63d75b9f09187 Mon Sep 17 00:00:00 2001 From: Alfred Landrum Date: Mon, 19 Jun 2023 14:09:32 -0700 Subject: [PATCH] support delay before history joins membership --- common/dynamicconfig/constants.go | 3 +++ service/history/configs/config.go | 12 +++++++----- service/history/service.go | 12 +++++++++++- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 312d80eed88..35d3ef721e6 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -478,6 +478,9 @@ const ( HistoryCacheMaxSize = "history.cacheMaxSize" // HistoryCacheTTL is TTL of history cache HistoryCacheTTL = "history.cacheTTL" + // HistoryStartupMembershipJoinDelay is the duration a history instance waits + // before joining membership after starting. + HistoryStartupMembershipJoinDelay = "history.startupMembershipJoinDelay" // HistoryShutdownDrainDuration is the duration of traffic drain during shutdown HistoryShutdownDrainDuration = "history.shutdownDrainDuration" // EventsCacheInitialSize is initial size of events cache diff --git a/service/history/configs/config.go b/service/history/configs/config.go index b3f2545c745..fc267fb1843 100644 --- a/service/history/configs/config.go +++ b/service/history/configs/config.go @@ -57,11 +57,12 @@ type Config struct { VisibilityDisableOrderByClause dynamicconfig.BoolPropertyFnWithNamespaceFilter VisibilityEnableManualPagination dynamicconfig.BoolPropertyFnWithNamespaceFilter - EmitShardLagLog dynamicconfig.BoolPropertyFn - MaxAutoResetPoints dynamicconfig.IntPropertyFnWithNamespaceFilter - ThrottledLogRPS dynamicconfig.IntPropertyFn - EnableStickyQuery dynamicconfig.BoolPropertyFnWithNamespaceFilter - ShutdownDrainDuration dynamicconfig.DurationPropertyFn + EmitShardLagLog dynamicconfig.BoolPropertyFn + MaxAutoResetPoints dynamicconfig.IntPropertyFnWithNamespaceFilter + ThrottledLogRPS dynamicconfig.IntPropertyFn + EnableStickyQuery dynamicconfig.BoolPropertyFnWithNamespaceFilter + ShutdownDrainDuration dynamicconfig.DurationPropertyFn + StartupMembershipJoinDelay dynamicconfig.DurationPropertyFn // HistoryCache settings // Change of these configs require shard restart @@ -334,6 +335,7 @@ func NewConfig( EnablePersistencePriorityRateLimiting: dc.GetBoolProperty(dynamicconfig.HistoryEnablePersistencePriorityRateLimiting, true), PersistenceDynamicRateLimitingParams: dc.GetMapProperty(dynamicconfig.HistoryPersistenceDynamicRateLimitingParams, dynamicconfig.DefaultDynamicRateLimitingParams), ShutdownDrainDuration: dc.GetDurationProperty(dynamicconfig.HistoryShutdownDrainDuration, 0*time.Second), + StartupMembershipJoinDelay: dc.GetDurationProperty(dynamicconfig.HistoryStartupMembershipJoinDelay, 0*time.Second), MaxAutoResetPoints: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryMaxAutoResetPoints, DefaultHistoryMaxAutoResetPoints), DefaultWorkflowTaskTimeout: dc.GetDurationPropertyFilteredByNamespace(dynamicconfig.DefaultWorkflowTaskTimeout, common.DefaultWorkflowTaskTimeout), ContinueAsNewMinInterval: dc.GetDurationPropertyFilteredByNamespace(dynamicconfig.ContinueAsNewMinInterval, time.Second), diff --git a/service/history/service.go b/service/history/service.go index 68085398d9b..fc7e82c7def 100644 --- a/service/history/service.go +++ b/service/history/service.go @@ -113,7 +113,17 @@ func (s *Service) Start() { // that we own. Ideally, then, we would start the GRPC server, and only then // join membership. That's not possible with the GRPC interface, though, hence // we start membership in a goroutine. - go s.membershipMonitor.Start() + go func() { + if delay := s.config.StartupMembershipJoinDelay(); delay > 0 { + // In some situations, like rolling upgrades of the history service, + // pausing before joining membership can help separate the shard movement + // caused by another history instance terminating with this instance starting. + logger.Info("history start: delaying before membership start", + tag.NewDurationTag("startupMembershipJoinDelay", delay)) + time.Sleep(delay) + } + s.membershipMonitor.Start() + }() logger.Info("Starting to serve on history listener") if err := s.server.Serve(s.grpcListener); err != nil {