9
9
10
10
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/retry"
11
11
"github.com/grpc-ecosystem/go-grpc-middleware/v2/util/backoffutils"
12
- enumspb "go.temporal.io/api/enums/v1"
13
- "go.temporal.io/api/errordetails/v1"
14
12
"google.golang.org/grpc"
15
13
"google.golang.org/grpc/codes"
16
14
"google.golang.org/grpc/status"
@@ -45,6 +43,11 @@ type (
45
43
maximumAttempts int
46
44
}
47
45
46
+ GrpcMessageTooLargeError struct {
47
+ err error
48
+ status * status.Status
49
+ }
50
+
48
51
contextKey struct {}
49
52
)
50
53
@@ -94,15 +97,15 @@ func NewGrpcRetryConfig(initialInterval time.Duration) *GrpcRetryConfig {
94
97
var (
95
98
// ConfigKey context key for GrpcRetryConfig
96
99
ConfigKey = contextKey {}
97
- // gRPC response codes that represent unconditionally retryable errors.
100
+ // gRPC response codes that represent retryable errors.
98
101
// The following status codes are never retried by the library:
99
102
// INVALID_ARGUMENT, NOT_FOUND, ALREADY_EXISTS, FAILED_PRECONDITION, ABORTED, OUT_OF_RANGE, DATA_LOSS
100
103
// codes.DeadlineExceeded and codes.Canceled are not here (and shouldn't be here!)
101
104
// because they are coming from go context and "context errors are not retriable based on user settings"
102
105
// by gRPC library.
103
- // codes.Internal and codes. ResourceExhausted have special logic for whether they are retryable or not,
104
- // and so they're not included in this list .
105
- alwaysRetryableCodes = []codes.Code {codes .Aborted , codes .Unavailable , codes .Unknown }
106
+ // codes.ResourceExhausted is non-retryable if it comes from GrpcMessageTooLargeError, but otherwise is retryable.
107
+ // codes.Internal is not included because it's retryable or non-retryable depending on server capabilities .
108
+ retryableCodesWithoutInternal = []codes.Code {codes .Aborted , codes . ResourceExhausted , codes .Unavailable , codes .Unknown }
106
109
)
107
110
108
111
// NewRetryOptionsInterceptor creates a new gRPC interceptor that populates retry options for each call based on values
@@ -132,7 +135,7 @@ func NewRetryOptionsInterceptor(excludeInternal *atomic.Bool) grpc.UnaryClientIn
132
135
opts = append (opts , grpc_retry .WithMax (math .MaxUint32 ))
133
136
}
134
137
opts = append (opts , grpc_retry .WithRetriable (func (err error ) bool {
135
- return IsRetryable (status . Convert ( err ) , excludeInternal )
138
+ return IsRetryable (err , excludeInternal )
136
139
}))
137
140
} else {
138
141
// Do not retry if retry config is not set.
@@ -142,51 +145,48 @@ func NewRetryOptionsInterceptor(excludeInternal *atomic.Bool) grpc.UnaryClientIn
142
145
}
143
146
}
144
147
145
- func IsRetryable (status * status.Status , excludeInternalFromRetry * atomic.Bool ) bool {
146
- if status == nil {
148
+ func IsRetryable (err error , excludeInternalFromRetry * atomic.Bool ) bool {
149
+ if _ , ok := err .(* GrpcMessageTooLargeError ); ok {
150
+ return false
151
+ }
152
+ grpcStatus := status .Convert (err )
153
+ if grpcStatus == nil {
147
154
return false
148
155
}
149
- errCode := status .Code ()
150
- for _ , retryable := range alwaysRetryableCodes {
156
+ errCode := grpcStatus .Code ()
157
+ for _ , retryable := range retryableCodesWithoutInternal {
151
158
if errCode == retryable {
152
159
return true
153
160
}
154
161
}
155
162
if errCode == codes .Internal {
156
163
return ! excludeInternalFromRetry .Load ()
157
164
}
158
- if errCode == codes .ResourceExhausted {
159
- if details := status .Details (); len (details ) > 0 {
160
- if failure , ok := details [0 ].(* errordetails.ResourceExhaustedFailure ); ok {
161
- return failure .Cause != RESOURCE_EXHAUSTED_CAUSE_EXT_GRPC_MESSAGE_TOO_LARGE
162
- }
163
- }
164
- }
165
165
return false
166
166
}
167
167
168
- // RESOURCE_EXHAUSTED_CAUSE_EXT_GRPC_MESSAGE_TOO_LARGE is an extension to the ResourceExhaustedCause enum to mark gRPC message too large errors.
169
- const RESOURCE_EXHAUSTED_CAUSE_EXT_GRPC_MESSAGE_TOO_LARGE enumspb.ResourceExhaustedCause = 101 // TODO: add the cause to the upstream API repo and remove this (see https://github.com/temporalio/sdk-go/issues/2030)
170
-
171
- // SetGrpcMessageTooLargeErrorCauseInterceptor adds appropriate error details if the error cause is gRPC message being too large.
172
- func SetGrpcMessageTooLargeErrorCauseInterceptor (ctx context.Context , method string , req , reply interface {}, cc * grpc.ClientConn , invoker grpc.UnaryInvoker , opts ... grpc.CallOption ) error {
168
+ // GrpcMessageTooLargeErrorInterceptor checks if the error is caused by gRPC message being too large and converts it into GrpcMessageTooLargeError.
169
+ func GrpcMessageTooLargeErrorInterceptor (ctx context.Context , method string , req , reply interface {}, cc * grpc.ClientConn , invoker grpc.UnaryInvoker , opts ... grpc.CallOption ) error {
173
170
err := invoker (ctx , method , req , reply , cc , opts ... )
174
- if grpcStatus := status .Convert (err ); isGrpcMessageTooLargeError (grpcStatus ) {
175
- // Copying code and message but ignoring original details
176
- newStatus := status .New (grpcStatus .Code (), grpcStatus .Message ())
177
- newStatus , detailsErr := newStatus .WithDetails (& errordetails.ResourceExhaustedFailure {
178
- Cause : RESOURCE_EXHAUSTED_CAUSE_EXT_GRPC_MESSAGE_TOO_LARGE ,
179
- })
180
- if detailsErr == nil {
181
- if newErr := newStatus .Err (); newErr != nil {
182
- err = newErr
183
- }
184
- }
171
+ if grpcStatus := status .Convert (err ); isGrpcMessageTooLargeStatus (grpcStatus ) {
172
+ err = & GrpcMessageTooLargeError {err : err , status : grpcStatus }
185
173
}
186
174
return err
187
175
}
188
176
189
- func isGrpcMessageTooLargeError (status * status.Status ) bool {
177
+ func (e * GrpcMessageTooLargeError ) Error () string {
178
+ return e .err .Error ()
179
+ }
180
+
181
+ func (e * GrpcMessageTooLargeError ) Unwrap () error {
182
+ return e .err
183
+ }
184
+
185
+ func (e * GrpcMessageTooLargeError ) GRPCStatus () * status.Status {
186
+ return e .status
187
+ }
188
+
189
+ func isGrpcMessageTooLargeStatus (status * status.Status ) bool {
190
190
if status == nil {
191
191
return false
192
192
}
0 commit comments