forked from datastax/cql-proxy
-
Notifications
You must be signed in to change notification settings - Fork 2
/
retrypolicy.go
124 lines (108 loc) · 5.03 KB
/
retrypolicy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright (c) DataStax, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proxy
import (
"github.com/datastax/go-cassandra-native-protocol/message"
"github.com/datastax/go-cassandra-native-protocol/primitive"
)
// RetryDecision is a type used for deciding what to do when a request has failed.
type RetryDecision int
func (r RetryDecision) String() string {
switch r {
case RetrySame:
return "retry same node"
case RetryNext:
return "retry next node"
case ReturnError:
return "returning error"
}
return "unknown"
}
const (
// RetrySame should be returned when a request should be retried on the same host.
RetrySame RetryDecision = iota
// RetryNext should be returned when a request should be retried on the next host according to the request's query
// plan.
RetryNext
// ReturnError should be returned when a request's original error should be forwarded along to the client.
ReturnError
)
// RetryPolicy is an interface for defining retry behavior when a server-side error occurs.
type RetryPolicy interface {
// OnReadTimeout handles the retry decision for a server-side read timeout error (Read_timeout = 0x1200).
// This occurs when a replica read request times out during a read query.
OnReadTimeout(msg *message.ReadTimeout, retryCount int) RetryDecision
// OnWriteTimeout handles the retry decision for a server-side write timeout error (Write_timeout = 0x1100).
// This occurs when a replica write request times out during a write query.
OnWriteTimeout(msg *message.WriteTimeout, retryCount int) RetryDecision
// OnUnavailable handles the retry decision for a server-side unavailable exception (Unavailable = 0x1000).
// This occurs when a coordinator determines that there are not enough replicas to handle a query at the requested
// consistency level.
OnUnavailable(msg *message.Unavailable, retryCount int) RetryDecision
// OnErrorResponse handles the retry decision for other potentially recoverable errors.
// This can be called for the following error types: server error (ServerError = 0x0000),
// overloaded (Overloaded = 0x1001), truncate error (Truncate_error = 0x1003), read failure (Read_failure = 0x1300),
// and write failure (Write_failure = 0x1500).
OnErrorResponse(msg message.Error, retryCount int) RetryDecision
}
type defaultRetryPolicy struct{}
var defaultRetryPolicyInstance defaultRetryPolicy
// NewDefaultRetryPolicy creates a new default retry policy.
// The default retry policy takes a conservative approach to retrying requests. In most cases it retries only once in
// cases where a retry is likely to succeed.
func NewDefaultRetryPolicy() RetryPolicy {
return &defaultRetryPolicyInstance
}
// OnReadTimeout retries in the case where there were enough replicas to satisfy the request, but one of the replicas
// didn't respond with data and timed out. It's likely that a single retry to the same coordinator will succeed because
// it will have recognized the replica as dead before the retry is attempted.
//
// In all other cases it will forward the original error to the client.
func (d defaultRetryPolicy) OnReadTimeout(msg *message.ReadTimeout, retryCount int) RetryDecision {
if retryCount == 0 && msg.Received >= msg.BlockFor && !msg.DataPresent {
return RetrySame
} else {
return ReturnError
}
}
// OnWriteTimeout retries in the case where a coordinator failed to write its batch log to a set of datacenter local
// nodes. It's likely that a single retry to the same coordinator will succeed because it will have recognized the
// dead nodes and use a different set of nodes.
//
// In all other cases it will forward the original error to the client.
func (d defaultRetryPolicy) OnWriteTimeout(msg *message.WriteTimeout, retryCount int) RetryDecision {
if retryCount == 0 && msg.WriteType == primitive.WriteTypeBatchLog {
return RetrySame
} else {
return ReturnError
}
}
// OnUnavailable retries, once, on the next coordinator in the query plan. This is to handle the case where a
// coordinator is failing because it was partitioned from a set of its replicas.
func (d defaultRetryPolicy) OnUnavailable(_ *message.Unavailable, retryCount int) RetryDecision {
if retryCount == 0 {
return RetryNext
} else {
return ReturnError
}
}
// OnErrorResponse retries on the next coordinator for all error types except for read and write failures.
func (d defaultRetryPolicy) OnErrorResponse(msg message.Error, retryCount int) RetryDecision {
code := msg.GetErrorCode()
if code == primitive.ErrorCodeReadFailure || code == primitive.ErrorCodeWriteFailure {
return ReturnError
} else {
return RetryNext
}
}