### Performance reports

#### Vanila insert and commit
- Block heights 0 to 621100: About 2 days (48 Hours)

In [None]:
# For secrets
from secret import rpc_user, rpc_password

In [None]:
## Check the bitcoind running
import os

datadir = os.path.abspath(os.path.expanduser('~/.bitcoin'))
pid_path = os.path.join(datadir, 'bitcoind.pid')
if os.path.exists(pid_path):
    with open(pid_path, 'r') as f:
        print(f'Bitcoind PID: {f.read()}')
else:
    raise SystemExit('Bitcoind is running!')

In [None]:
## Some library
from address_convertor import pubkey_to_address, get_pubkey

In [None]:
## Some function for convenient
import datetime

tz_seoul = datetime.timezone(datetime.timedelta(hours=9))
tz_utc = datetime.timezone(datetime.timedelta())

def get_time(timestamp):
    return datetime.datetime.fromtimestamp(timestamp, tz=tz_seoul)

In [None]:
# preparation database
import sqlite3

db_path = 'cluster.db'
conn = sqlite3.connect(db_path)
cur = conn.cursor()

    
def create_txhash_table():
    cur.execute('''CREATE TABLE IF NOT EXISTS TxHash (
                     txhash TEXT PRIMARY KEY,
                     height INTEGER NOT NULL);
                ''')

def create_txin_table():
    cur.execute('''CREATE TABLE IF NOT EXISTS TxIn (
                     address TEXT NOT NULL,
                     txhash TEXT NOT NULL);
                ''')

def create_txout_table():
    cur.execute('''CREATE TABLE IF NOT EXISTS TxOut (
                     address TEXT NOT NULL,
                     txhash TEXT NOT NULL);
                ''')    

def create_cluster_table():
    cur.execute('''CREATE TABLE IF NOT EXISTS Cluster (
                     address TEXT PRIMARY KEY,
                     number INTEGER NOT NULL);
                ''')

    
def insert_txhash(txhash, height):
    cur.execute('''INSERT OR REPLACE INTO TxHash (
                       txhash, height) VALUES (
                       ?, ?);
                    ''', (txhash, height))
    
def insert_txin(address, txhash):
    cur.execute('''INSERT OR REPLACE INTO TxIn (
                       address, txhash) VALUES (
                       ?, ?);
                    ''', (address, txhash))

def insert_txout(address, txhash):
    cur.execute('''INSERT OR REPLACE INTO TxOut (
                       address, txhash) VALUES (
                       ?, ?);
                    ''', (address, txhash))

def insert_cluster(address, number):
    cur.execute('''INSERT OR IGNORE INTO Cluster (
                       address, number) VALUES (
                       ?, ?);
                    ''', (address, number))

def do_cluster(addresses, number):
    cur.executemany(f'''UPDATE Cluster SET number = {number} WHERE address IN ({",".join(addresses)})''')

    
def begin_transactions():
    cur.execute('BEGIN TRANSACTION;')

def commit_transactions():
    cur.execute('COMMIT;')
    


def get_max_height():
    cur.execute('''SELECT MAX(height) FROM TxHash;''')
    return cur.fetchone()[0]

def get_cluster_number(addresses):
    cur.execute(f'''SELECT MIN(number) FROM Cluster WHERE address IN ({",".join(addresses)})''')
    return cur.fetchone()[0]    

In [None]:
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
rpc_ip = '127.0.0.1'
rpc_port = '8332'
timeout = 300

rpc_connection = AuthServiceProxy(f'http://{rpc_user}:{rpc_password}@{rpc_ip}:{rpc_port}', timeout=timeout)
best_block_hash = rpc_connection.getbestblockhash()
best_block = rpc_connection.getblock(best_block_hash)
print(f'Best Block Heights: {best_block["height"]}, Time: {get_time(best_block["time"]).isoformat()}')

In [None]:
%%time

create_txin_table()
create_txout_table()
create_cluster_table()
create_txhash_table()

In [None]:
import time
import pickle

if os.path.exists('taking.pickle'):
    with open('taking.pickle', 'rb') as f:
        taking = pickle.load(f)
else:
    taking = list()

rpc_connection = AuthServiceProxy(f'http://{rpc_user}:{rpc_password}@{rpc_ip}:{rpc_port}', timeout=timeout)

start_height = get_max_height()
if start_height is None:
    start_height = 0
end_height = best_block['height']-99 # default
end_height = 200000
print(f'Start from {start_height}')


stime = time.time()
for height in range(start_height, end_height):
    begin_transactions()
    block_hash = rpc_connection.getblockhash(height)
    block = rpc_connection.getblock(block_hash, 2)
    for tx in block['tx']:
        insert_txhash(tx['hash'], height)
        iaddresses = set()
        oaddresses = set()
        for vin in tx['vin']:
            if 'coinbase' in vin.keys():
                continue
            ptx = rpc_connection.getrawtransaction(vin['txid'], 1)
            pvout = ptx['vout'][vin['vout']]
            if pvout['scriptPubKey']['type'] in ('pubkeyhash', 'scripthash', 
                                                 'witness_v0_keyhash', 'witness_v0_scripthash',
                                                 'multisig'):
                iaddresses = pvout['scriptPubKey']['addresses']
            elif pvout['scriptPubKey']['type'] == 'pubkey':
                iaddresses = [pubkey_to_address(get_pubkey(pvout['scriptPubKey']['hex']))]    
            iaddresses = set(iaddresses)
            for address in iaddresses:
                insert_txin(address, tx['hash'])
        for vout in tx['vout']:
            if vout['scriptPubKey']['type'] in ('pubkeyhash', 'scripthash', 
                                                'witness_v0_keyhash', 'witness_v0_scripthash',
                                                'multisig'):
                oaddresses = vout['scriptPubKey']['addresses']
            elif vout['scriptPubKey']['type'] == 'pubkey':
                oaddresses = [pubkey_to_address(get_pubkey(vout['scriptPubKey']['hex']))]
            oaddresses = set(oaddresses)
            for address in oaddresses:
                insert_txout(address, tx['hash'])
                insert_cluster(address, -1)
        
        ## Heuristic
        if len(iaddresses) > 1 and len(oaddresses) == 1:
            cluster_number = get_cluster_number(iaddresses)
            if cluster_number == -1:
                cluster_number = 0
            do_cluster(iaddresses, cluster_number)
            print(iaddresses)
    commit_transactions()
    if height % 1000 == 0:
        tt = int(time.time()-stime)
        print(f'[{tt}] {height} Done')
        taking.append(tt)
        stime = time.time()
tt = int(time.time()-stime)
print(f'[{tt}] {height} Done')
taking.append(tt)

In [None]:
conn.close()

In [None]:
import pickle

with open('taking.pickle', 'wb') as f:
    pickle.dump(taking, f)

In [None]:
### 연산 시간

#### 0 ~ : 1000당 10초
#### 10000 ~ : 1000당 20초
#### 93000 ~ : 1000당 90초
#### 114000 ~ : 1000당 120초
#### 121000 ~ : 1000당 180초