In [None]:
# preparation database
import sqlite3

rdb_path = 'index-not-unique.db'
rconn = sqlite3.connect(rdb_path)
rcur = rconn.cursor()
wdb_path = 'index-unique.db'
wconn = sqlite3.connect(wdb_path)
wcur = wconn.cursor()

def create_blkid_table():
    wcur.execute('''CREATE TABLE IF NOT EXISTS BlkID (
                     id INTEGER PRIMARY KEY,
                     blkhash TEXT NOT NULL UNIQUE);
                 ''')

def create_txid_table():
    wcur.execute('''CREATE TABLE IF NOT EXISTS TxID (
                     id INTEGER PRIMARY KEY,
                     txhash TEXT NOT NULL UNIQUE);
                 ''')

def create_addrid_table():
    wcur.execute('''CREATE TABLE IF NOT EXISTS AddrID (
                      id INTEGER PRIMARY KEY,
                      addr TEXT NOT NULL UNIQUE);
                 ''')

def insert_blkids(blkhash):
    wcur.executemany('''INSERT OR IGNORE INTO BlkID (
                        blkhash) VALUES (
                        ?);
                     ''', blkhash)

def insert_txids(txhash):
    wcur.executemany('''INSERT OR IGNORE INTO TxID (
                        txhash) VALUES (
                        ?);
                     ''', txhash)

def insert_addrids(addr):
    wcur.executemany('''INSERT OR IGNORE INTO AddrID (
                        addr) VALUES (
                        ?);
                     ''', addr)
    
def begin_transactions():
    wcur.execute('BEGIN TRANSACTION;')

def commit_transactions():
    wcur.execute('COMMIT;')

def journal_mode(mode):
    sql = f'PRAGMA journal_mode = {mode}'
    wcur.execute(sql)
    wconn.commit()

In [None]:
create_blkid_table()
create_txid_table()
create_addrid_table()

In [None]:
import time

fetchsize = 100000
journal_mode('WAL')
# Block
t1 = time.time()
rcur.execute('''SELECT DISTINCT blkhash FROM BlkID ORDER BY id;''')
while True:
    t3 = time.time()
    items = rcur.fetchmany(fetchsize)
    if len(items) == 0:
        break
    begin_transactions()
    insert_blkids(items)
    commit_transactions()
    t4 = time.time()
    print(f'Block {t4 - t3}', end='\r')
t2 = time.time()
print(f'Block bulk insert {t2 - t1}')
# Tx
t1 = time.time()
rcur.execute('''SELECT DISTINCT txhash FROM TxID ORDER BY id;''')
while True:
    t3 = time.time()
    items = rcur.fetchmany(fetchsize)
    if len(items) == 0:
        break
    begin_transactions()
    insert_txids(items)
    commit_transactions()
    t4 = time.time()
    print(f'Tx {t4 - t3}', end='\r')
t2 = time.time()
print(f'Tx bulk insert {t2 - t1}')
# Addr
t1 = time.time()
rcur.execute('''SELECT DISTINCT addr FROM AddrID ORDER BY id;''')
while True:
    t3 = time.time()
    items = rcur.fetchmany(fetchsize)
    if len(items) == 0:
        break
    begin_transactions()
    insert_addrids(items)
    commit_transactions()
    t4 = time.time()
    print(f'Addr {t4 - t3}', end='\r')
t2 = time.time()
print(f'Addr bulk insert {t2 - t1}')
journal_mode('DELETE')
rconn.close()
wconn.close()