From d022e364d7bd7aa6fcad25ba9c1f3082a7bf4ac7 Mon Sep 17 00:00:00 2001 From: Maxim Fateev Date: Wed, 22 Apr 2020 20:45:49 -0700 Subject: [PATCH 1/3] Changed workflow timeouts --- common/message.proto | 13 +-- decision/message.proto | 57 ++++++++---- event/message.proto | 121 +++++++++++++++---------- execution/message.proto | 5 +- workflowservice/request_response.proto | 89 ++++++++++-------- 5 files changed, 167 insertions(+), 118 deletions(-) diff --git a/common/message.proto b/common/message.proto index 4e1516c5e..bfd649b36 100644 --- a/common/message.proto +++ b/common/message.proto @@ -54,25 +54,18 @@ message Header { } message RetryPolicy { - // Interval of the first retry. If coefficient is 1.0 then it is used for all retries. + // Interval of the first retry. If retryBackoffCoefficient is 1.0 then it is used for all retries. int32 initialIntervalInSeconds = 1; - // Coefficient used to calculate the next retry interval. // The next retry interval is previous interval multiplied by the coefficient. // Must be 1 or larger. double backoffCoefficient = 2; - // Maximum interval between retries. Exponential backoff leads to interval increase. - // This value is the cap of the increase. Default is 100x of initial interval. + // This value is the cap of the increase. Default is 100x of the initial interval. int32 maximumIntervalInSeconds = 3; - // Maximum number of attempts. When exceeded the retries stop even if not expired yet. - // Must be 1 or bigger. Default is unlimited. + // 1 disables retries. 0 means unlimited (up to the timeouts) int32 maximumAttempts = 4; - // Non-Retriable errors. Will stop retrying if error matches this list. repeated string nonRetriableErrorReasons = 5; - - // Expiration time for the whole retry process. - int32 expirationIntervalInSeconds = 6; } diff --git a/decision/message.proto b/decision/message.proto index 4680b7555..216db48c2 100644 --- a/decision/message.proto +++ b/decision/message.proto @@ -37,13 +37,25 @@ message ScheduleActivityTaskDecisionAttributes { common.ActivityType activityType = 2; string namespace = 3; tasklist.TaskList taskList = 4; - bytes input = 5; - int32 scheduleToCloseTimeoutSeconds = 6; - int32 scheduleToStartTimeoutSeconds = 7; - int32 startToCloseTimeoutSeconds = 8; - int32 heartbeatTimeoutSeconds = 9; - common.RetryPolicy retryPolicy = 10; - common.Header header = 11; + common.Header header = 5; + bytes input = 6; + // Indicates how long the caller is willing to wait for an activity completion. + // Limits for how long retries are happening. Either this or startToCloseTimeoutSeconds is required. + // When not specified defaults to the workflow execution timeout. + int32 scheduleToCloseTimeoutSeconds = 7; + // Limits time an activity task can stay in a task queue before a worker picks it up. + // This timeout is always non retriable as all a retry would achieve is to put it back into the same queue. + // Defaults to scheduleToCloseTimeoutSeconds or workflow execution timeout if not specified. + int32 scheduleToStartTimeoutSeconds = 8; + // Maximum time an activity is allowed to execute after a pick up by a worker. + // This timeout is always retriable. Either this or scheduleToCloseTimeoutSeconds is required. + int32 startToCloseTimeoutSeconds = 9; + // Maximum time between successful worker heartbeats. + int32 heartbeatTimeoutSeconds = 10; + // Retry parameters. Note that activity is retried by default according to a default retry policy. + // To disable retries provide a retry policy with maximumAttempts equals to 1. + // The retries happen up to scheduleToCloseTimeout. + common.RetryPolicy retryPolicy = 11; } message RequestCancelActivityTaskDecisionAttributes { @@ -103,8 +115,11 @@ message ContinueAsNewWorkflowExecutionDecisionAttributes { common.WorkflowType workflowType = 1; tasklist.TaskList taskList = 2; bytes input = 3; - int32 executionStartToCloseTimeoutSeconds = 4; - int32 taskStartToCloseTimeoutSeconds = 5; + // workflowExecutionTimeout is omitted as it shouldn'be overridden from within a workflow + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 4; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 5; int32 backoffStartIntervalInSeconds = 6; common.RetryPolicy retryPolicy = 7; common.ContinueAsNewInitiator initiator = 8; @@ -123,16 +138,20 @@ message StartChildWorkflowExecutionDecisionAttributes { common.WorkflowType workflowType = 3; tasklist.TaskList taskList = 4; bytes input = 5; - int32 executionStartToCloseTimeoutSeconds = 6; - int32 taskStartToCloseTimeoutSeconds = 7; - common.ParentClosePolicy parentClosePolicy = 8; - bytes control = 9; - common.WorkflowIdReusePolicy workflowIdReusePolicy = 10; - common.RetryPolicy retryPolicy = 11; - string cronSchedule = 12; - common.Header header = 13; - common.Memo memo = 14; - common.SearchAttributes searchAttributes = 15; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 6; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 7; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 8; + common.ParentClosePolicy parentClosePolicy = 9; + bytes control = 10; + common.WorkflowIdReusePolicy workflowIdReusePolicy = 11; + common.RetryPolicy retryPolicy = 12; + string cronSchedule = 13; + common.Header header = 14; + common.Memo memo = 15; + common.SearchAttributes searchAttributes = 16; } message Decision { diff --git a/event/message.proto b/event/message.proto index 67fd5e6f6..d7ee4fb81 100644 --- a/event/message.proto +++ b/event/message.proto @@ -39,27 +39,31 @@ message WorkflowExecutionStartedEventAttributes { int64 parentInitiatedEventId = 4; tasklist.TaskList taskList = 5; bytes input = 6; - int32 executionStartToCloseTimeoutSeconds = 7; - int32 taskStartToCloseTimeoutSeconds = 8; - string continuedExecutionRunId = 9; - common.ContinueAsNewInitiator initiator = 10; - string continuedFailureReason = 11; - bytes continuedFailureDetails = 12; - bytes lastCompletionResult = 13; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 7; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 8; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 9; + string continuedExecutionRunId = 10; + common.ContinueAsNewInitiator initiator = 11; + string continuedFailureReason = 12; + bytes continuedFailureDetails = 13; + bytes lastCompletionResult = 14; // This is the runId when the WorkflowExecutionStarted event is written. - string originalExecutionRunId = 14; - string identity = 15; + string originalExecutionRunId = 15; + string identity = 16; // This is the very first runId along the chain of ContinueAsNew and Reset. - string firstExecutionRunId = 16; - common.RetryPolicy retryPolicy = 17; - int32 attempt = 18; - int64 expirationTimestamp = 19; - string cronSchedule = 20; - int32 firstDecisionTaskBackoffSeconds = 21; - common.Memo memo = 22; - common.SearchAttributes searchAttributes = 23; - execution.ResetPoints prevAutoResetPoints = 24; - common.Header header = 25; + string firstExecutionRunId = 17; + common.RetryPolicy retryPolicy = 18; + int32 attempt = 19; + int64 workflowExecutionTimeoutTimestamp = 20; + string cronSchedule = 21; + int32 firstDecisionTaskBackoffSeconds = 22; + common.Memo memo = 23; + common.SearchAttributes searchAttributes = 24; + execution.ResetPoints prevAutoResetPoints = 25; + common.Header header = 26; } message WorkflowExecutionCompletedEventAttributes { @@ -82,17 +86,21 @@ message WorkflowExecutionContinuedAsNewEventAttributes { common.WorkflowType workflowType = 2; tasklist.TaskList taskList = 3; bytes input = 4; - int32 executionStartToCloseTimeoutSeconds = 5; - int32 taskStartToCloseTimeoutSeconds = 6; - int64 decisionTaskCompletedEventId = 7; - int32 backoffStartIntervalInSeconds = 8; - common.ContinueAsNewInitiator initiator = 9; - string failureReason = 10; - bytes failureDetails = 11; - bytes lastCompletionResult = 12; - common.Header header = 13; - common.Memo memo = 14; - common.SearchAttributes searchAttributes = 15; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 5; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 6; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 7; + int64 decisionTaskCompletedEventId = 8; + int32 backoffStartIntervalInSeconds = 9; + common.ContinueAsNewInitiator initiator = 10; + string failureReason = 11; + bytes failureDetails = 12; + bytes lastCompletionResult = 13; + common.Header header = 14; + common.Memo memo = 15; + common.SearchAttributes searchAttributes = 16; } message DecisionTaskScheduledEventAttributes { @@ -140,14 +148,25 @@ message ActivityTaskScheduledEventAttributes { common.ActivityType activityType = 2; string namespace = 3; tasklist.TaskList taskList = 4; - bytes input = 5; - int32 scheduleToCloseTimeoutSeconds = 6; - int32 scheduleToStartTimeoutSeconds = 7; - int32 startToCloseTimeoutSeconds = 8; - int32 heartbeatTimeoutSeconds = 9; - int64 decisionTaskCompletedEventId = 10; - common.RetryPolicy retryPolicy = 11; - common.Header header = 12; + common.Header header = 5; + bytes input = 6; + // Indicates how long the caller is willing to wait for an activity completion. + // Limits for how long retries are happening. Either this or startToCloseTimeoutSeconds is required. + int32 scheduleToCloseTimeoutSeconds = 7; + // Limits time an activity task can stay in a task queue before a worker picks it up. + // This timeout is always non retriable as all a retry would achieve is to put it back into the same queue. + // Defaults to scheduleToCloseTimeoutSeconds or workflow execution timeout if not specified. + int32 scheduleToStartTimeoutSeconds = 8; + // Maximum time an activity is allowed to execute after a pick up by a worker. + // This timeout is always retriable. Either this or scheduleToCloseTimeoutSeconds is required. + int32 startToCloseTimeoutSeconds = 9; + // Maximum time between successful worker heartbeats. + int32 heartbeatTimeoutSeconds = 10; + int64 decisionTaskCompletedEventId = 11; + // Activities are provided by a default retry policy controlled through the service dynamic configuration. + // Retries are happening up to scheduleToCloseTimeout. + // To disable retries set retryPolicy.maximumAttempts to 1. + common.RetryPolicy retryPolicy = 12; } message ActivityTaskStartedEventAttributes { @@ -320,17 +339,21 @@ message StartChildWorkflowExecutionInitiatedEventAttributes { common.WorkflowType workflowType = 3; tasklist.TaskList taskList = 4; bytes input = 5; - int32 executionStartToCloseTimeoutSeconds = 6; - int32 taskStartToCloseTimeoutSeconds = 7; - common.ParentClosePolicy parentClosePolicy = 8; - bytes control = 9; - int64 decisionTaskCompletedEventId = 10; - common.WorkflowIdReusePolicy workflowIdReusePolicy = 11; - common.RetryPolicy retryPolicy = 12; - string cronSchedule = 13; - common.Header header = 14; - common.Memo memo = 15; - common.SearchAttributes searchAttributes = 16; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 6; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 7; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 8; + common.ParentClosePolicy parentClosePolicy = 9; + bytes control = 10; + int64 decisionTaskCompletedEventId = 11; + common.WorkflowIdReusePolicy workflowIdReusePolicy = 12; + common.RetryPolicy retryPolicy = 13; + string cronSchedule = 14; + common.Header header = 15; + common.Memo memo = 16; + common.SearchAttributes searchAttributes = 17; } message StartChildWorkflowExecutionFailedEventAttributes { diff --git a/execution/message.proto b/execution/message.proto index 1082a8dee..351ee737f 100644 --- a/execution/message.proto +++ b/execution/message.proto @@ -55,8 +55,9 @@ message WorkflowExecutionInfo { message WorkflowExecutionConfiguration { tasklist.TaskList taskList = 1; - int32 executionStartToCloseTimeoutSeconds = 2; - int32 taskStartToCloseTimeoutSeconds = 3; + int32 workflowExecutionTimeoutSeconds = 2; + int32 workflowRunTimeoutSeconds = 3; + int32 workflowTaskTimeoutSeconds = 4; } message PendingActivityInfo { diff --git a/workflowservice/request_response.proto b/workflowservice/request_response.proto index 06aaf6f72..11836258a 100644 --- a/workflowservice/request_response.proto +++ b/workflowservice/request_response.proto @@ -118,16 +118,21 @@ message StartWorkflowExecutionRequest { common.WorkflowType workflowType = 3; tasklist.TaskList taskList = 4; bytes input = 5; - int32 executionStartToCloseTimeoutSeconds = 6; - int32 taskStartToCloseTimeoutSeconds = 7; - string identity = 8; - string requestId = 9; - common.WorkflowIdReusePolicy workflowIdReusePolicy = 10; - common.RetryPolicy retryPolicy = 11; - string cronSchedule = 12; - common.Memo memo = 13; - common.SearchAttributes searchAttributes = 14; - common.Header header = 15; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 6; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 7; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 8; + string identity = 9; + string requestId = 10; + common.WorkflowIdReusePolicy workflowIdReusePolicy = 11; + // Retries up to workflowExecutionTimeout + common.RetryPolicy retryPolicy = 12; + string cronSchedule = 13; + common.Memo memo = 14; + common.SearchAttributes searchAttributes = 15; + common.Header header = 16; } message StartWorkflowExecutionResponse { @@ -211,21 +216,25 @@ message PollForActivityTaskRequest { message PollForActivityTaskResponse { bytes taskToken = 1; - execution.WorkflowExecution workflowExecution = 2; - string activityId = 3; - common.ActivityType activityType = 4; - bytes input = 5; - int64 scheduledTimestamp = 6; - int32 scheduleToCloseTimeoutSeconds = 7; - int64 startedTimestamp = 8; - int32 startToCloseTimeoutSeconds = 9; - int32 heartbeatTimeoutSeconds = 10; - int32 attempt = 11; - int64 scheduledTimestampOfThisAttempt = 12; - bytes heartbeatDetails = 13; - common.WorkflowType workflowType = 14; - string workflowNamespace = 15; - common.Header header = 16; + string workflowNamespace = 2; + common.WorkflowType workflowType = 3; + execution.WorkflowExecution workflowExecution = 4; + common.ActivityType activityType = 5; + string activityId = 6; + common.Header header = 7; + bytes input = 8; + bytes heartbeatDetails = 9; + int64 scheduledTimestamp = 10; + int64 scheduledTimestampOfThisAttempt = 11; + int64 startedTimestamp = 12; + int32 attempt = 13; + int32 scheduleToCloseTimeoutSeconds = 14; + int32 startToCloseTimeoutSeconds = 15; + int32 heartbeatTimeoutSeconds = 16; + // This is an actual retry policy the service uses. + // It can be different from the one provided (or not) during activity scheduling + // as the service can override the provided one with default values. + common.RetryPolicy retryPolicy = 17; } message RecordActivityTaskHeartbeatRequest { @@ -345,19 +354,23 @@ message SignalWithStartWorkflowExecutionRequest { common.WorkflowType workflowType = 3; tasklist.TaskList taskList = 4; bytes input = 5; - int32 executionStartToCloseTimeoutSeconds = 6; - int32 taskStartToCloseTimeoutSeconds = 7; - string identity = 8; - string requestId = 9; - common.WorkflowIdReusePolicy workflowIdReusePolicy = 10; - string signalName = 11; - bytes signalInput = 12; - bytes control = 13; - common.RetryPolicy retryPolicy = 14; - string cronSchedule = 15; - common.Memo memo = 16; - common.SearchAttributes searchAttributes = 17; - common.Header header = 18; + // Total workflow execution timeout including retries and continue as new + int32 workflowExecutionTimeoutSeconds = 6; + // Timeout of a single workflow run + int32 workflowRunTimeoutSeconds = 7; + // Timeout of a single workflow task + int32 workflowTaskTimeoutSeconds = 8; + string identity = 9; + string requestId = 10; + common.WorkflowIdReusePolicy workflowIdReusePolicy = 11; + string signalName = 12; + bytes signalInput = 13; + bytes control = 14; + common.RetryPolicy retryPolicy = 15; + string cronSchedule = 16; + common.Memo memo = 17; + common.SearchAttributes searchAttributes = 18; + common.Header header = 19; } message SignalWithStartWorkflowExecutionResponse { From 4945936c4e1d5b6521f88d8ac3870f3cbb3e04e5 Mon Sep 17 00:00:00 2001 From: Maxim Fateev Date: Thu, 23 Apr 2020 14:15:52 -0700 Subject: [PATCH 2/3] removed workflowExecutionTimeoutSeconds from WorkflowExecutionContinuedAsNewEventAttributes --- event/message.proto | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/event/message.proto b/event/message.proto index d7ee4fb81..328da1453 100644 --- a/event/message.proto +++ b/event/message.proto @@ -86,8 +86,7 @@ message WorkflowExecutionContinuedAsNewEventAttributes { common.WorkflowType workflowType = 2; tasklist.TaskList taskList = 3; bytes input = 4; - // Total workflow execution timeout including retries and continue as new - int32 workflowExecutionTimeoutSeconds = 5; + // workflowExecutionTimeout is omitted as it shouldn'be overridden from within a workflow // Timeout of a single workflow run int32 workflowRunTimeoutSeconds = 6; // Timeout of a single workflow task From f3a3b7309c4e62a859710a1219032edc288f71b4 Mon Sep 17 00:00:00 2001 From: Maxim Fateev Date: Wed, 29 Apr 2020 15:09:12 -0700 Subject: [PATCH 3/3] PR feedback --- decision/message.proto | 3 ++- event/message.proto | 4 +++- workflowservice/request_response.proto | 11 ++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/decision/message.proto b/decision/message.proto index 486256891..669facb13 100644 --- a/decision/message.proto +++ b/decision/message.proto @@ -51,8 +51,9 @@ message ScheduleActivityTaskDecisionAttributes { // This timeout is always retriable. Either this or scheduleToCloseTimeoutSeconds is required. int32 startToCloseTimeoutSeconds = 9; // Maximum time between successful worker heartbeats. + // Optional. By default no heartbeating is required. int32 heartbeatTimeoutSeconds = 10; - // Retry parameters. Note that activity is retried by default according to a default retry policy. + // Activity retry policy. Note that activity is retried by default according to a default retry policy. // To disable retries provide a retry policy with maximumAttempts equals to 1. // The retries happen up to scheduleToCloseTimeout. common.RetryPolicy retryPolicy = 11; diff --git a/event/message.proto b/event/message.proto index ed4918d0a..ffd8c3424 100644 --- a/event/message.proto +++ b/event/message.proto @@ -57,7 +57,9 @@ message WorkflowExecutionStartedEventAttributes { string firstExecutionRunId = 17; common.RetryPolicy retryPolicy = 18; int32 attempt = 19; - int64 workflowExecutionTimeoutTimestamp = 20; + // The absolute time at which workflow is timed out. + // This time is passed without change to the next run/retry of a workflow. + int64 workflowExecutionExpirationTimestamp = 20; string cronSchedule = 21; int32 firstDecisionTaskBackoffSeconds = 22; common.Memo memo = 23; diff --git a/workflowservice/request_response.proto b/workflowservice/request_response.proto index 1332a30ba..58a466e88 100644 --- a/workflowservice/request_response.proto +++ b/workflowservice/request_response.proto @@ -118,16 +118,16 @@ message StartWorkflowExecutionRequest { common.WorkflowType workflowType = 3; tasklist.TaskList taskList = 4; common.Payload input = 5; - // Total workflow execution timeout including retries and continue as new + // Total workflow execution timeout including retries and continue as new. int32 workflowExecutionTimeoutSeconds = 6; - // Timeout of a single workflow run + // Timeout of a single workflow run. int32 workflowRunTimeoutSeconds = 7; - // Timeout of a single workflow task + // Timeout of a single workflow task. int32 workflowTaskTimeoutSeconds = 8; string identity = 9; string requestId = 10; common.WorkflowIdReusePolicy workflowIdReusePolicy = 11; - // Retries up to workflowExecutionTimeout + // Retries up to workflowExecutionTimeoutSeconds. common.RetryPolicy retryPolicy = 12; string cronSchedule = 13; common.Memo memo = 14; @@ -232,7 +232,8 @@ message PollForActivityTaskResponse { int32 heartbeatTimeoutSeconds = 16; // This is an actual retry policy the service uses. // It can be different from the one provided (or not) during activity scheduling - // as the service can override the provided one with default values. + // as the service can override the provided one in case its values are not specified + // or exceed configured system limits. common.RetryPolicy retryPolicy = 17; }