forked from hashicorp/raft
/
transport.go
331 lines (293 loc) · 8.87 KB
/
transport.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
package fuzzy
import (
"bufio"
"bytes"
"errors"
"fmt"
"github.com/hashicorp/go-hclog"
"io"
"os"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/go-msgpack/codec"
"github.com/titus12/raft"
)
var (
codecHandle codec.MsgpackHandle
)
type appendEntries struct {
source string
target raft.ServerAddress
term uint64
firstIndex uint64
lastIndex uint64
commitIndex uint64
}
type transports struct {
sync.RWMutex
nodes map[string]*transport
log hclog.Logger
}
func newTransports(log hclog.Logger) *transports {
return &transports{
nodes: make(map[string]*transport),
log: log,
}
}
func (tc *transports) AddNode(n string, hooks TransportHooks) *transport {
t := newTransport(n, tc, hooks)
t.log = tc.log
tc.Lock()
defer tc.Unlock()
tc.nodes[n] = t
return t
}
// TransportHooks allow a test to customize the behavior of the transport.
// [if you return an error from a PreXXX call, then the error is returned to the caller, and the RPC never made]
type TransportHooks interface {
// PreRPC is called before every single RPC call from the transport
PreRPC(src, target string, r *raft.RPC) error
// PostRPC is called after the RPC call has been processed by the target, but before the source see's the response
PostRPC(src, target string, r *raft.RPC, result *raft.RPCResponse) error
// PreREquestVote is called before sending a RequestVote RPC request.
PreRequestVote(src, target string, r *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error)
// PreAppendEntries is called before sending an AppendEntries RPC request.
PreAppendEntries(src, target string, r *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error)
}
type transport struct {
log hclog.Logger
transports *transports
node string
ae []appendEntries
consumer chan raft.RPC
hooks TransportHooks
}
func newTransport(node string, tc *transports, hooks TransportHooks) *transport {
return &transport{
node: node,
transports: tc,
hooks: hooks,
consumer: make(chan raft.RPC),
ae: make([]appendEntries, 0, 50000),
}
}
// Consumer returns a channel that can be used to
// consume and respond to RPC requests.
func (t *transport) Consumer() <-chan raft.RPC {
return t.consumer
}
// LocalAddr is used to return our local address to distinguish from our peers.
func (t *transport) LocalAddr() raft.ServerAddress {
return raft.ServerAddress(t.node)
}
func (t *transport) sendRPC(target string, req interface{}, resp interface{}) error {
t.transports.RLock()
tt := t.transports.nodes[target]
if tt == nil {
t.log.Info("sendRPC unknown node", "target", target, "transports", t.transports.nodes)
t.transports.RUnlock()
return fmt.Errorf("Unknown target host %v", target)
}
t.transports.RUnlock()
rc := make(chan raft.RPCResponse, 1)
buff := bytes.Buffer{}
if err := codec.NewEncoder(&buff, &codecHandle).Encode(req); err != nil {
return err
}
rpc := raft.RPC{RespChan: rc}
var reqVote raft.RequestVoteRequest
var timeoutNow raft.TimeoutNowRequest
var appEnt raft.AppendEntriesRequest
dec := codec.NewDecoderBytes(buff.Bytes(), &codecHandle)
switch req.(type) {
case *raft.TimeoutNowRequest:
if err := dec.Decode(&timeoutNow); err != nil {
return err
}
rpc.Command = &timeoutNow
case *raft.RequestVoteRequest:
if err := dec.Decode(&reqVote); err != nil {
return err
}
rpc.Command = &reqVote
case *raft.AppendEntriesRequest:
if err := dec.Decode(&appEnt); err != nil {
return err
}
rpc.Command = &appEnt
default:
t.log.Warn("unexpected request type", "type", hclog.Fmt("%T", req), "request", req)
}
var result *raft.RPCResponse
if t.hooks != nil {
if err := t.hooks.PreRPC(t.node, target, &rpc); err != nil {
return err
}
switch req.(type) {
case *raft.RequestVoteRequest:
hr, err := t.hooks.PreRequestVote(t.node, target, &reqVote)
if hr != nil || err != nil {
result = &raft.RPCResponse{Response: hr, Error: err}
}
case *raft.AppendEntriesRequest:
hr, err := t.hooks.PreAppendEntries(t.node, target, &appEnt)
if hr != nil || err != nil {
result = &raft.RPCResponse{Response: hr, Error: err}
}
}
}
if result == nil {
tt.consumer <- rpc
cr := <-rc
result = &cr
}
if t.hooks != nil {
err := t.hooks.PostRPC(t.node, target, &rpc, result)
if err != nil {
result.Error = err
}
}
buff = bytes.Buffer{}
codec.NewEncoder(&buff, &codecHandle).Encode(result.Response)
codec.NewDecoderBytes(buff.Bytes(), &codecHandle).Decode(resp)
return result.Error
}
// TimeoutNow implements the Transport interface.
func (t *transport) TimeoutNow(id raft.ServerID, target raft.ServerAddress, args *raft.TimeoutNowRequest, resp *raft.TimeoutNowResponse) error {
return t.sendRPC(string(target), args, resp)
}
// AppendEntries sends the appropriate RPC to the target node.
func (t *transport) AppendEntries(id raft.ServerID, target raft.ServerAddress, args *raft.AppendEntriesRequest, resp *raft.AppendEntriesResponse) error {
ae := appendEntries{
source: t.node,
target: target,
firstIndex: firstIndex(args),
lastIndex: lastIndex(args),
commitIndex: args.LeaderCommitIndex,
}
if len(t.ae) < cap(t.ae) {
t.ae = append(t.ae, ae)
}
return t.sendRPC(string(target), args, resp)
}
func (t *transport) DumpLog(dir string) {
fw, _ := os.Create(filepath.Join(dir, t.node+".transport"))
w := bufio.NewWriter(fw)
for i := range t.ae {
e := &t.ae[i]
fmt.Fprintf(w, "%v -> %v\t%8d - %8d : %8d\n", e.source, e.target, e.firstIndex, e.lastIndex, e.commitIndex)
}
w.Flush()
fw.Close()
}
func firstIndex(a *raft.AppendEntriesRequest) uint64 {
if len(a.Entries) == 0 {
return 0
}
return a.Entries[0].Index
}
func lastIndex(a *raft.AppendEntriesRequest) uint64 {
if len(a.Entries) == 0 {
return 0
}
return a.Entries[len(a.Entries)-1].Index
}
// RequestVote sends the appropriate RPC to the target node.
func (t *transport) RequestVote(id raft.ServerID, target raft.ServerAddress, args *raft.RequestVoteRequest, resp *raft.RequestVoteResponse) error {
return t.sendRPC(string(target), args, resp)
}
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
// the ReadCloser and streamed to the client.
func (t *transport) InstallSnapshot(id raft.ServerID, target raft.ServerAddress, args *raft.InstallSnapshotRequest, resp *raft.InstallSnapshotResponse, data io.Reader) error {
t.log.Debug("INSTALL SNAPSHOT *************************************")
return errors.New("huh")
}
// EncodePeer is used to serialize a peer name.
func (t *transport) EncodePeer(id raft.ServerID, p raft.ServerAddress) []byte {
return []byte(p)
}
// DecodePeer is used to deserialize a peer name.
func (t *transport) DecodePeer(p []byte) raft.ServerAddress {
return raft.ServerAddress(p)
}
// SetHeartbeatHandler is used to setup a heartbeat handler
// as a fast-pass. This is to avoid head-of-line blocking from
// disk IO. If a Transport does not support this, it can simply
// ignore the call, and push the heartbeat onto the Consumer channel.
func (t *transport) SetHeartbeatHandler(cb func(rpc raft.RPC)) {
}
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
func (t *transport) AppendEntriesPipeline(id raft.ServerID, target raft.ServerAddress) (raft.AppendPipeline, error) {
p := &pipeline{
t: t,
id: id,
target: target,
work: make(chan *appendEntry, 100),
consumer: make(chan raft.AppendFuture, 100),
}
go p.run()
return p, nil
}
type appendEntry struct {
req *raft.AppendEntriesRequest
res *raft.AppendEntriesResponse
start time.Time
err error
ready chan error
consumer chan raft.AppendFuture
}
func (e *appendEntry) Request() *raft.AppendEntriesRequest {
return e.req
}
func (e *appendEntry) Response() *raft.AppendEntriesResponse {
<-e.ready
return e.res
}
func (e *appendEntry) Start() time.Time {
return e.start
}
func (e *appendEntry) Error() error {
<-e.ready
return e.err
}
func (e *appendEntry) Respond(err error) {
e.err = err
close(e.ready)
e.consumer <- e
}
type pipeline struct {
t *transport
target raft.ServerAddress
id raft.ServerID
work chan *appendEntry
consumer chan raft.AppendFuture
}
func (p *pipeline) run() {
for ap := range p.work {
err := p.t.AppendEntries(p.id, p.target, ap.req, ap.res)
ap.Respond(err)
}
}
// AppendEntries is used to add another request to the pipeline.
// The send may block which is an effective form of back-pressure.
func (p *pipeline) AppendEntries(args *raft.AppendEntriesRequest, resp *raft.AppendEntriesResponse) (raft.AppendFuture, error) {
e := &appendEntry{
req: args,
res: resp,
start: time.Now(),
ready: make(chan error),
consumer: p.consumer,
}
p.work <- e
return e, nil
}
func (p *pipeline) Consumer() <-chan raft.AppendFuture {
return p.consumer
}
// Closes pipeline and cancels all inflight RPCs
func (p *pipeline) Close() error {
close(p.work)
return nil
}