-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
block_fetcher.ex
285 lines (227 loc) · 11 KB
/
block_fetcher.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
defmodule Explorer.Indexer.BlockFetcher do
@moduledoc """
Fetches and indexes block ranges from gensis to realtime.
"""
use GenServer
require Logger
alias Explorer.{Chain, Indexer, JSONRPC}
alias Explorer.Indexer.{
Sequence,
BlockImporter
}
alias Explorer.JSONRPC.Transactions
# dialyzer thinks that Logger.debug functions always have no_local_return
@dialyzer {:nowarn_function, import_range: 3}
# These are all the *default* values for options. DO NOT use them directly in the code. Get options from `state`.
@debug_logs false
@blocks_batch_size 10
@blocks_concurrency 10
@internal_transactions_batch_size 50
@internal_transactions_concurrency 8
# milliseconds
@block_rate 5_000
@receipts_batch_size 250
@receipts_concurrency 20
@doc """
Starts the server.
## Options
Default options are pulled from application config under the
`:explorer, :indexer` keyspace. The follow options can be overridden:
* `:debug_logs` - When `true` logs verbose index progress. Defaults `false`.
* `:blocks_batch_size` - The number of blocks to request in one call to the JSONRPC. Defaults to
`#{@blocks_batch_size}`. Block requests also include the transactions for those blocks. *These transactions
are not paginated.*
* `:blocks_concurrency` - The number of concurrent requests of `:blocks_batch_size` to allow against the JSONRPC.
Defaults to #{@blocks_concurrency}. So upto `blocks_concurrency * block_batch_size` (defaults to
`#{@blocks_concurrency * @blocks_batch_size}`) blocks can be requested from the JSONRPC at once over all
connections.
* `:block_rate` - The millisecond rate new blocks are published at. Defaults to `#{@block_rate}` milliseconds.
* `:internal transactions_batch_size` - The number of transaction hashes to request internal transactions for
in one call to the JSONRPC. Defaults to `#{@internal_transactions_batch_size}`.
* `:internal transactions_concurrency` - The number of concurrent requests of `:internal transactions_batch_size` to
allow against the JSONRPC **for each block range**. Defaults to `#{@internal_transactions_concurrency}`. So upto
`block_concurrency * internal_transactions_batch_size * internal transactions_concurrency` (defaults to
`#{@blocks_concurrency * @internal_transactions_concurrency * @internal_transactions_batch_size}`) transactions
can be requesting their internal transactions can be requested from the JSONRPC at once over all connections.
*The internal transactions for individual transactions cannot be paginated, so the total number of internal
transactions that could be produced is unknown.*
* `:receipts_batch_size` - The number of receipts to request in one call to the JSONRPC. Defaults to
`#{@receipts_batch_size}`. Receipt requests also include the logs for when the transaction was collated into the
block. *These logs are not paginated.*
* `:receipts_concurrency` - The number of concurrent requests of `:receipts_batch_size` to allow against the JSONRPC
**for each block range**. Defaults to `#{@receipts_concurrency}`. So upto
`block_concurrency * receipts_batch_size * receipts_concurrency` (defaults to
`#{@blocks_concurrency * @receipts_concurrency * @receipts_batch_size}`) receipts can be requested from the
JSONRPC at once over all connections. *Each transaction only has one receipt.*
"""
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@impl GenServer
def init(opts) do
opts = Keyword.merge(Application.fetch_env!(:explorer, :indexer), opts)
send(self(), :catchup_index)
:timer.send_interval(15_000, self(), :debug_count)
state = %{
genesis_task: nil,
debug_logs: Keyword.get(opts, :debug_logs, @debug_logs),
realtime_interval: (opts[:block_rate] || @block_rate) * 2,
blocks_batch_size: Keyword.get(opts, :blocks_batch_size, @blocks_batch_size),
blocks_concurrency: Keyword.get(opts, :blocks_concurrency, @blocks_concurrency),
internal_transactions_batch_size:
Keyword.get(opts, :internal_transactions_batch_size, @internal_transactions_batch_size),
internal_transactions_concurrency:
Keyword.get(opts, :internal_transactions_concurrency, @internal_transactions_concurrency),
receipts_batch_size: Keyword.get(opts, :receipts_batch_size, @receipts_batch_size),
receipts_concurrency: Keyword.get(opts, :receipts_concurrency, @receipts_concurrency)
}
{:ok, state}
end
@impl GenServer
def handle_info(:catchup_index, state) do
{:ok, genesis_task} = Task.start_link(fn -> genesis_task(state) end)
Process.monitor(genesis_task)
{:noreply, %{state | genesis_task: genesis_task}}
end
def handle_info(:realtime_index, state) do
{:ok, realtime_task} = Task.start_link(fn -> realtime_task(state) end)
Process.monitor(realtime_task)
{:noreply, %{state | realtime_task: realtime_task}}
end
def handle_info({:DOWN, _ref, :process, pid, :normal}, %{realtime_task: pid} = state) do
{:noreply, schedule_next_realtime_fetch(%{state | realtime_task: nil})}
end
def handle_info({:DOWN, _ref, :process, pid, :normal}, %{genesis_task: pid} = state) do
Logger.info(fn -> "Finished index from genesis. Transitioning to realtime index." end)
{:noreply, schedule_next_realtime_fetch(%{state | genesis_task: nil})}
end
def handle_info(:debug_count, state) do
debug(state, fn ->
"""
================================
persisted counts
================================
blocks: #{Chain.block_count()}
internal transactions: #{Chain.internal_transaction_count()}
receipts: #{Chain.receipt_count()}
logs: #{Chain.log_count()}
addresses: #{Chain.address_count()}
"""
end)
{:noreply, state}
end
defp cap_seq(seq, :end_of_chain, {_block_start, _block_end}, _state) do
:ok = Sequence.cap(seq)
end
defp cap_seq(_seq, :more, {block_start, block_end}, state) do
debug(state, fn -> "got blocks #{block_start} - #{block_end}" end)
:ok
end
defp fetch_internal_transactions(_state, []), do: {:ok, []}
defp fetch_internal_transactions(state, hashes) do
debug(state, fn -> "fetching internal transactions for #{length(hashes)} transactions" end)
stream_opts = [max_concurrency: state.internal_transactions_concurrency, timeout: :infinity]
hashes
|> Enum.chunk_every(state.internal_transactions_batch_size)
|> Task.async_stream(&JSONRPC.fetch_internal_transactions(&1), stream_opts)
|> Enum.reduce_while({:ok, []}, fn
{:ok, {:ok, internal_transactions}}, {:ok, acc} -> {:cont, {:ok, acc ++ internal_transactions}}
{:ok, {:error, reason}}, {:ok, _acc} -> {:halt, {:error, reason}}
{:error, reason}, {:ok, _acc} -> {:halt, {:error, reason}}
end)
end
defp fetch_transaction_receipts(_state, []), do: {:ok, %{logs: [], receipts: []}}
defp fetch_transaction_receipts(state, hashes) do
debug(state, fn -> "fetching #{length(hashes)} transaction receipts" end)
stream_opts = [max_concurrency: state.receipts_concurrency, timeout: :infinity]
hashes
|> Enum.chunk_every(state.receipts_batch_size)
|> Task.async_stream(&JSONRPC.fetch_transaction_receipts(&1), stream_opts)
|> Enum.reduce_while({:ok, %{logs: [], receipts: []}}, fn
{:ok, {:ok, %{logs: logs, receipts: receipts}}}, {:ok, %{logs: acc_logs, receipts: acc_receipts}} ->
{:cont, {:ok, %{logs: acc_logs ++ logs, receipts: acc_receipts ++ receipts}}}
{:ok, {:error, reason}}, {:ok, _acc} ->
{:halt, {:error, reason}}
{:error, reason}, {:ok, _acc} ->
{:halt, {:error, reason}}
end)
end
defp genesis_task(state) do
{count, missing_ranges} = missing_block_numbers(state)
current_block = Indexer.next_block_number()
debug(state, fn -> "#{count} missed block ranges between genesis and #{current_block}" end)
{:ok, seq} = Sequence.start_link(missing_ranges, current_block, state.blocks_batch_size)
stream_import(state, seq, max_concurrency: state.blocks_concurrency)
end
defp insert(state, seq, range, params) do
case BlockImporter.import_blocks(params) do
:ok ->
:ok
{:error, step, reason} ->
debug(state, fn ->
"failed to insert blocks during #{step} #{inspect(range)}: #{inspect(reason)}. Retrying"
end)
:ok = Sequence.inject_range(seq, range)
end
end
defp missing_block_numbers(%{blocks_batch_size: blocks_batch_size}) do
{count, missing_ranges} = Chain.missing_block_numbers()
chunked_ranges =
Enum.flat_map(missing_ranges, fn
{start, ending} when ending - start <= blocks_batch_size ->
[{start, ending}]
{start, ending} ->
start
|> Stream.iterate(&(&1 + blocks_batch_size))
|> Enum.reduce_while([], fn
chunk_start, acc when chunk_start + blocks_batch_size >= ending ->
{:halt, [{chunk_start, ending} | acc]}
chunk_start, acc ->
{:cont, [{chunk_start, chunk_start + blocks_batch_size - 1} | acc]}
end)
|> Enum.reverse()
end)
{count, chunked_ranges}
end
defp realtime_task(state) do
{:ok, seq} = Sequence.start_link([], Indexer.next_block_number(), 2)
stream_import(state, seq, max_concurrency: 1)
end
defp stream_import(state, seq, task_opts) do
seq
|> Sequence.build_stream()
|> Task.async_stream(&import_range(&1, state, seq), Keyword.merge(task_opts, timeout: :infinity))
|> Enum.each(fn {:ok, :ok} -> :ok end)
end
# Run at state.blocks_concurrency max_concurrency
defp import_range({block_start, block_end} = range, state, seq) do
with {:blocks, {:ok, next, result}} <- {:blocks, JSONRPC.fetch_blocks_by_range(block_start, block_end)},
%{blocks: blocks, transactions: transactions} = result,
cap_seq(seq, next, range, state),
transaction_hashes = Transactions.params_to_hashes(transactions),
{:receipts, {:ok, receipt_params}} <- {:receipts, fetch_transaction_receipts(state, transaction_hashes)},
%{logs: logs, receipts: receipts} = receipt_params,
{:internal_transactions, {:ok, internal_transactions}} <-
{:internal_transactions, fetch_internal_transactions(state, transaction_hashes)} do
insert(state, seq, range, %{
blocks: blocks,
internal_transactions: internal_transactions,
logs: logs,
receipts: receipts,
transactions: transactions
})
else
{step, {:error, reason}} ->
debug(state, fn ->
"failed to fetch #{step} for blocks #{block_start} - #{block_end}: #{inspect(reason)}. Retrying block range."
end)
:ok = Sequence.inject_range(seq, range)
end
end
defp schedule_next_realtime_fetch(state) do
timer = Process.send_after(self(), :realtime_index, state.realtime_interval)
%{state | poll_timer: timer}
end
defp debug(%{debug_logs: true}, func), do: Logger.debug(func)
defp debug(%{debug_logs: false}, _func), do: :noop
end