### Bitcoin block sampler
[Bitcoin core RPC API](https://en.bitcoin.it/wiki/Original_Bitcoin_client/API_calls_list)

[Bitcoin core RPC Doc](https://bitcoin.org/en/developer-reference#remote-procedure-calls-rpcs)

#### Operation ordering


In [None]:
# For secrets
from secret import rpc_user, rpc_password

In [None]:
## Check the bitcoind running
import os

datadir = os.path.abspath(os.path.expanduser('~/.bitcoin'))
pid_path = os.path.join(datadir, 'bitcoind.pid')
if os.path.exists(pid_path):
    with open(pid_path, 'r') as f:
        print(f'Bitcoind PID: {f.read()}')
else:
    raise SystemExit('Bitcoind is running!')

datadir = os.path.abspath(os.path.expanduser('~/.bitcoin/regtest'))
pid_path = os.path.join(datadir, 'bitcoind.pid')
if os.path.exists(pid_path):
    with open(pid_path, 'r') as f:
        print(f'Bitcoind -chain=regtest PID: {f.read()}')
else:
    raise SystemExit('Bitcoind -chain=regtest is running!')

In [None]:
## Some library
from address_convertor import pubkey_to_address, get_pubkey

In [None]:
## Some function for convenient
import datetime

tz_seoul = datetime.timezone(datetime.timedelta(hours=9))
tz_utc = datetime.timezone(datetime.timedelta())

def get_time(timestamp):
    return datetime.datetime.fromtimestamp(timestamp, tz=tz_seoul)

In [None]:
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
rpc_ip = '127.0.0.1' # default '127.0.0.1'
rpc_port = '8332' # default '8332'
rpc_port_regtest = '18443' # default '18443'
timeout = 300

rpc_connection = AuthServiceProxy(f'http://{rpc_user}:{rpc_password}@{rpc_ip}:{rpc_port}', timeout=timeout)
best_block_hash = rpc_connection.getbestblockhash()
best_block = rpc_connection.getblock(best_block_hash)
print(f'Best Main Block Heights: {best_block["height"]}, Time: {get_time(best_block["time"]).isoformat()}')

In [None]:
def get_balance(rpc_connection, v):
    tx = rpc_connection.getrawtransaction(v['txid'], 1)
    block = rpc_connection.getblock(tx['blockhash'])
    if tx['vout'][v['vout']]['scriptPubKey']['type'] in ('pubkeyhash', 'scripthash', 
                                                         'witness_v0_keyhash', 'witness_v0_scripthash',
                                                         'multisig'):
        addr = tx['vout'][v['vout']]['scriptPubKey']['addresses']
    elif tx['vout'][v['vout']]['scriptPubKey']['type'] == 'pubkey':
        addr = [pubkey_to_address(get_pubkey(tx['vout'][v['vout']]['scriptPubKey']['hex']))]
    else:
        raise Exception(f'Need to fix {v["txid"]}')
    height = block['height']
    value = tx['vout'][v['vout']]['value']
    print(value)
    return addr, height, value

In [None]:
# Input block
sheight = int(input('Start block height: '))
eheight = int(input('End block height: '))

In [None]:
import json

def sigint_safe_json_saver(data, path):
    try:
        with open(path, 'w') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    except KeyboardInterrupt:
        with open(path, 'w') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

In [None]:
import os

def get_processed_blocks(path):
    processed = set()
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                name = os.path.basename(entry.path)
                processed.add(name.split('.')[0])
    return processed

In [None]:
import os
import time

path_base = os.path.abspath(os.path.expanduser(f'./{sheight}-{eheight}'))
os.makedirs(path_base, exist_ok=True)
processed = get_processed_blocks(path_base)

def get_dummy(theight):
    stime = time.time()
    if not theight in processed:
        base = list()
        rpc_connection = AuthServiceProxy(f'http://{rpc_user}:{rpc_password}@{rpc_ip}:{rpc_port}', 
                                          timeout=timeout)
        block_hash = rpc_connection.getblockhash(theight)
        block = rpc_connection.getblock(block_hash, 2)
        for tx in block['tx'][1:]:
            for v in tx['vin']:
                addr, height, value = get_balance(rpc_connection, v)
                if not (sheight <= height <= eheight):
                    base.append({'addresses': addr, 'value': float(value)})
        path = os.path.join(path_base, f'{theight}.json')
        sigint_safe_json_saver(base, path)
    etime = time.time()
    print(f'[{os.getpid()}] {theight} block processed during {etime-stime} secs.')

In [None]:
import time
import itertools
import multiprocessing

stime = time.time()
pool_num = 8 # quad core with hyper threads
with multiprocessing.Pool(pool_num) as p:
    p.map(get_dummy, range(sheight, eheight+1))
etime = time.time()
print(f'Job done {len(result)} during {etime-stime}')