-
Notifications
You must be signed in to change notification settings - Fork 597
/
retries.go
187 lines (154 loc) · 6.3 KB
/
retries.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*
Copyright 2021 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kncloudevents
import (
"context"
"fmt"
"math"
"net/http"
"time"
"github.com/rickb777/date/period"
v1 "knative.dev/eventing/pkg/apis/duck/v1"
)
var noRetries = RetryConfig{
RetryMax: 0,
CheckRetry: func(ctx context.Context, resp *http.Response, err error) (bool, error) {
return false, nil
},
Backoff: func(attemptNum int, resp *http.Response) time.Duration {
return 0
},
}
// CheckRetry specifies a policy for handling retries. It is called
// following each request with the response and error values returned by
// the http.Client. If CheckRetry returns false, the Client stops retrying
// and returns the response to the caller. If CheckRetry returns an error,
// that error value is returned in lieu of the error from the request. The
// Client will close any response body when retrying, but if the retry is
// aborted it is up to the CheckRetry callback to properly close any
// response body before returning.
type CheckRetry func(ctx context.Context, resp *http.Response, err error) (bool, error)
// Backoff specifies a policy for how long to wait between retries.
// It is called after a failing request to determine the amount of time
// that should pass before trying again.
type Backoff func(attemptNum int, resp *http.Response) time.Duration
type RetryConfig struct {
// Maximum number of retries
RetryMax int
// These next two variables are just copied from the original DeliverySpec so
// we can detect if anything has changed. We can not do that with the CheckRetry
// Backoff (at least not easily).
BackoffDelay *string
BackoffPolicy *v1.BackoffPolicyType
CheckRetry CheckRetry
Backoff Backoff
// RequestTimeout represents the timeout of the single request
RequestTimeout time.Duration
// RetryAfterMaxDuration represents an optional override for the maximum
// value allowed for "Retry-After" headers in 429 / 503 responses. A nil
// value indicates no maximum override. A value of "0" indicates "Retry-After"
// headers are to be ignored.
RetryAfterMaxDuration *time.Duration
}
func NoRetries() RetryConfig {
return noRetries
}
func RetryConfigFromDeliverySpec(spec v1.DeliverySpec) (RetryConfig, error) {
retryConfig := NoRetries()
retryConfig.CheckRetry = RetryIfGreaterThan300
if spec.Retry != nil {
retryConfig.RetryMax = int(*spec.Retry)
}
retryConfig.BackoffPolicy = spec.BackoffPolicy
retryConfig.BackoffDelay = spec.BackoffDelay
if spec.BackoffPolicy != nil && spec.BackoffDelay != nil {
delay, err := period.Parse(*spec.BackoffDelay)
if err != nil {
return retryConfig, fmt.Errorf("failed to parse Spec.BackoffDelay: %w", err)
}
delayDuration, _ := delay.Duration()
switch *spec.BackoffPolicy {
case v1.BackoffPolicyExponential:
retryConfig.Backoff = func(attemptNum int, resp *http.Response) time.Duration {
return delayDuration * time.Duration(math.Exp2(float64(attemptNum)))
}
case v1.BackoffPolicyLinear:
retryConfig.Backoff = func(attemptNum int, resp *http.Response) time.Duration {
return delayDuration * time.Duration(attemptNum)
}
}
}
if spec.Timeout != nil {
timeout, err := period.Parse(*spec.Timeout)
if err != nil {
return retryConfig, fmt.Errorf("failed to parse Spec.Timeout: %w", err)
}
retryConfig.RequestTimeout, _ = timeout.Duration()
}
if spec.RetryAfterMax != nil {
maxPeriod, err := period.Parse(*spec.RetryAfterMax)
if err != nil { // Should never happen based on DeliverySpec validation
return retryConfig, fmt.Errorf("failed to parse Spec.RetryAfterMax: %w", err)
}
maxDuration, _ := maxPeriod.Duration()
retryConfig.RetryAfterMaxDuration = &maxDuration
}
return retryConfig, nil
}
// RetryIfGreaterThan300 is a simple default implementation
func RetryIfGreaterThan300(_ context.Context, response *http.Response, err error) (bool, error) {
return !(response != nil && (response.StatusCode < 300 && response.StatusCode != -1)), err
}
// SelectiveRetry is an alternative function to determine whether to retry based on response
//
// Note - Returning true indicates a retry should occur. Returning an error will result in that
//
// error being returned instead of any errors from the Request.
//
// A retry is triggered for:
// * nil responses
// * emitted errors
// * status codes that are 5XX, 404, 408, 409, 429 as well if the statuscode is -1.
func SelectiveRetry(_ context.Context, response *http.Response, err error) (bool, error) {
// Retry Any Nil HTTP Response
if response == nil {
return true, nil
}
// Retry Any Errors
if err != nil {
return true, nil
}
// Extract The StatusCode From The Response & Add To Logger
statusCode := response.StatusCode
// Note - Normally we would NOT want to retry 4xx responses, BUT there are a few
// known areas of knative-eventing that return codes in this range which
// require retries. Reasons for particular codes are as follows:
//
// 404 Although we would ideally not want to retry a permanent "Not Found"
// response, a 404 can be returned when a pod is in the process of becoming
// ready, so a retry can be a useful thing in this situation.
// 408 Request Timeout is a good practice to issue a retry.
// 409 Returned by the E2E tests, so we must retry when "Conflict" is received, or the
// tests will fail (see knative.dev/eventing/test/lib/recordevents/receiver/receiver.go)
// 429 Since retry typically involves a delay (usually an exponential backoff),
// retrying after receiving a "Too Many Requests" response is useful.
if statusCode >= 500 || statusCode == 404 || statusCode == 429 || statusCode == 408 || statusCode == 409 {
return true, nil
} else if statusCode >= 300 && statusCode <= 399 {
return false, nil
} else if statusCode == -1 {
return true, nil
}
// Do Not Retry 1XX, 2XX, 3XX & Most 4XX StatusCode Responses
return false, nil
}