forked from libp2p/go-libp2p-kad-dht
-
Notifications
You must be signed in to change notification settings - Fork 0
/
query.go
313 lines (258 loc) · 8.1 KB
/
query.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
// package query implement a query manager to drive concurrent workers
// to query the DHT. A query is setup with a target key, a queryFunc tasked
// to communicate with a peer, and a set of initial peers. As the query
// progress, queryFunc can return closer peers that will be used to navigate
// closer to the target key in the DHT until an answer is reached.
package dht
import (
"context"
"sync"
u "github.com/ipfs/go-ipfs-util"
logging "github.com/ipfs/go-log"
todoctr "github.com/ipfs/go-todocounter"
process "github.com/jbenet/goprocess"
ctxproc "github.com/jbenet/goprocess/context"
inet "github.com/libp2p/go-libp2p-net"
peer "github.com/libp2p/go-libp2p-peer"
pset "github.com/libp2p/go-libp2p-peer/peerset"
pstore "github.com/libp2p/go-libp2p-peerstore"
queue "github.com/libp2p/go-libp2p-peerstore/queue"
routing "github.com/libp2p/go-libp2p-routing"
notif "github.com/libp2p/go-libp2p-routing/notifications"
)
var maxQueryConcurrency = AlphaValue
type dhtQuery struct {
dht *IpfsDHT
key string // the key we're querying for
qfunc queryFunc // the function to execute per peer
concurrency int // the concurrency parameter
}
type dhtQueryResult struct {
value []byte // GetValue
peer *pstore.PeerInfo // FindPeer
providerPeers []pstore.PeerInfo // GetProviders
closerPeers []*pstore.PeerInfo // *
success bool
finalSet *pset.PeerSet
queriedSet *pset.PeerSet
}
// constructs query
func (dht *IpfsDHT) newQuery(k string, f queryFunc) *dhtQuery {
return &dhtQuery{
key: k,
dht: dht,
qfunc: f,
concurrency: maxQueryConcurrency,
}
}
// QueryFunc is a function that runs a particular query with a given peer.
// It returns either:
// - the value
// - a list of peers potentially better able to serve the query
// - an error
type queryFunc func(context.Context, peer.ID) (*dhtQueryResult, error)
// Run runs the query at hand. pass in a list of peers to use first.
func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
runner := newQueryRunner(q)
return runner.Run(ctx, peers)
}
type dhtQueryRunner struct {
query *dhtQuery // query to run
peersSeen *pset.PeerSet // all peers queried. prevent querying same peer 2x
peersQueried *pset.PeerSet // peers successfully connected to and queried
peersToQuery *queue.ChanQueue // peers remaining to be queried
peersRemaining todoctr.Counter // peersToQuery + currently processing
result *dhtQueryResult // query result
errs u.MultiErr // result errors. maybe should be a map[peer.ID]error
rateLimit chan struct{} // processing semaphore
log logging.EventLogger
runCtx context.Context
proc process.Process
sync.RWMutex
}
func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
proc := process.WithParent(process.Background())
ctx := ctxproc.OnClosingContext(proc)
return &dhtQueryRunner{
query: q,
peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(string(q.key))),
peersRemaining: todoctr.NewSyncCounter(),
peersSeen: pset.New(),
peersQueried: pset.New(),
rateLimit: make(chan struct{}, q.concurrency),
proc: proc,
}
}
func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
r.log = log
r.runCtx = ctx
if len(peers) == 0 {
log.Warning("Running query with no peers!")
return nil, nil
}
// setup concurrency rate limiting
for i := 0; i < r.query.concurrency; i++ {
r.rateLimit <- struct{}{}
}
// add all the peers we got first.
for _, p := range peers {
r.addPeerToQuery(p)
}
// go do this thing.
// do it as a child proc to make sure Run exits
// ONLY AFTER spawn workers has exited.
r.proc.Go(r.spawnWorkers)
// so workers are working.
// wait until they're done.
err := routing.ErrNotFound
// now, if the context finishes, close the proc.
// we have to do it here because the logic before is setup, which
// should run without closing the proc.
ctxproc.CloseAfterContext(r.proc, ctx)
select {
case <-r.peersRemaining.Done():
r.proc.Close()
r.RLock()
defer r.RUnlock()
err = routing.ErrNotFound
// if every query to every peer failed, something must be very wrong.
if len(r.errs) > 0 && len(r.errs) == r.peersSeen.Size() {
log.Debugf("query errs: %s", r.errs)
err = r.errs[0]
}
case <-r.proc.Closed():
r.RLock()
defer r.RUnlock()
err = context.DeadlineExceeded
}
if r.result != nil && r.result.success {
return r.result, nil
}
return &dhtQueryResult{
finalSet: r.peersSeen,
queriedSet: r.peersQueried,
}, err
}
func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) {
// if new peer is ourselves...
if next == r.query.dht.self {
r.log.Debug("addPeerToQuery skip self")
return
}
if !r.peersSeen.TryAdd(next) {
return
}
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
Type: notif.AddingPeer,
ID: next,
})
r.peersRemaining.Increment(1)
select {
case r.peersToQuery.EnqChan <- next:
case <-r.proc.Closing():
}
}
func (r *dhtQueryRunner) spawnWorkers(proc process.Process) {
for {
select {
case <-r.peersRemaining.Done():
return
case <-r.proc.Closing():
return
case <-r.rateLimit:
select {
case p, more := <-r.peersToQuery.DeqChan:
if !more {
return // channel closed.
}
// do it as a child func to make sure Run exits
// ONLY AFTER spawn workers has exited.
proc.Go(func(proc process.Process) {
r.queryPeer(proc, p)
})
case <-r.proc.Closing():
return
case <-r.peersRemaining.Done():
return
}
}
}
}
func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) {
// ok let's do this!
// create a context from our proc.
ctx := ctxproc.OnClosingContext(proc)
// make sure we do this when we exit
defer func() {
// signal we're done processing peer p
r.peersRemaining.Decrement(1)
r.rateLimit <- struct{}{}
}()
// make sure we're connected to the peer.
// FIXME abstract away into the network layer
// Note: Failure to connect in this block will cause the function to
// short circuit.
if r.query.dht.host.Network().Connectedness(p) == inet.NotConnected {
log.Debug("not connected. dialing.")
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
Type: notif.DialingPeer,
ID: p,
})
// while we dial, we do not take up a rate limit. this is to allow
// forward progress during potentially very high latency dials.
r.rateLimit <- struct{}{}
pi := pstore.PeerInfo{ID: p}
if err := r.query.dht.host.Connect(ctx, pi); err != nil {
log.Debugf("Error connecting: %s", err)
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
Type: notif.QueryError,
Extra: err.Error(),
ID: p,
})
r.Lock()
r.errs = append(r.errs, err)
r.Unlock()
<-r.rateLimit // need to grab it again, as we deferred.
return
}
<-r.rateLimit // need to grab it again, as we deferred.
log.Debugf("connected. dial success.")
}
// finally, run the query against this peer
res, err := r.query.qfunc(ctx, p)
r.peersQueried.Add(p)
if err != nil {
log.Debugf("ERROR worker for: %v %v", p, err)
r.Lock()
r.errs = append(r.errs, err)
r.Unlock()
} else if res.success {
log.Debugf("SUCCESS worker for: %v %s", p, res)
r.Lock()
r.result = res
r.Unlock()
go r.proc.Close() // signal to everyone that we're done.
// must be async, as we're one of the children, and Close blocks.
} else if len(res.closerPeers) > 0 {
log.Debugf("PEERS CLOSER -- worker for: %v (%d closer peers)", p, len(res.closerPeers))
for _, next := range res.closerPeers {
if next.ID == r.query.dht.self { // don't add self.
log.Debugf("PEERS CLOSER -- worker for: %v found self", p)
continue
}
// add their addresses to the dialer's peerstore
r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, pstore.TempAddrTTL)
r.addPeerToQuery(next.ID)
log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs)
}
} else {
log.Debugf("QUERY worker for: %v - not found, and no closer peers.", p)
}
}