In [None]:
import os
import csv
import sqlite3
import itertools
import time

import igraph
import scipy.special

In [None]:
path = os.path.abspath(os.path.expanduser(input('Read to csv file: ').strip()))

In [None]:
dbpath = 'index.db'
conn = sqlite3.connect(dbpath)
cur = conn.cursor()

def get_blkid(blkhash):
    cur.execute('''SELECT id FROM BlkID WHERE blkhash = ?''', (blkhash,))
    return cur.fetchone()[0]
    
def get_txid(txhash):
    cur.execute('''SELECT id FROM TxID WHERE txhash = ?''', (txhash,))
    return cur.fetchone()[0]
    
def get_addrid(addr):
    cur.execute('''SELECT id FROM AddrID WHERE addr = ?''', (addr,))
    return cur.fetchone()[0]

def get_txins(addr):
    cur.execute('''SELECT tx FROM TxIn WHERE addr = ?''', (addr,))
    return cur.fetchall()

def get_txouts(addr):
    cur.execute('''SELECT tx FROM TxOut WHERE addr = ?''', (addr,))
    return cur.fetchall()

def get_tx(saddr, daddr):
    cur.execute('''SELECT tx FROM TxIn WHERE addr = ?
                   INTERSECT
                   SELECT tx FROM TxOut WHERE addr = ?;''', (saddr, daddr))
    r = cur.fetchone()
    if r is not None:
        return True
    return False

In [None]:
nodes = list()
with open(path, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        addrid = get_addrid(row['Address'])
        nodes.append(addrid)
print(f'Nodes: {len(nodes)}')

In [None]:
def get_edge_nominators(nodes):
    for s, d in itertools.combinations(nodes, 2):
        yield (s, d)

def get_edges(args):
    s = args[0]
    d = args[1]
    if get_tx(s, d):
        return (s, d, True)
    else:
        return (s, d, False)

In [None]:
import multiprocessing

pool_num = multiprocessing.cpu_count()

edges = list()
cnt = 0
last = int(scipy.special.comb(len(nodes), 2))
stime = time.time()
with multiprocessing.Pool(pool_num) as p:
    results = p.imap_unordered(get_edges, get_edge_nominators(nodes))
    for s, d, b in results:
        cnt = cnt + 1
        print(f'[{cnt/last*100:.5f}% : {len(edges)} : {time.time()-stime:.1f}] {s} x {d} ', end='\r')
        if b:
            edges.append((s, d))
            print(f'[{cnt/last*100:.5f} : {len(edges)} : {time.time()-stime:.1f}] {s} - {d}')

In [None]:
g = igraph.Graph()
g.add_vertices(nodes)
g.add_edges(edges)
g.write_pickle('graph.igraph')

In [None]:
stime = time.time()
partition = g.community_leiden(objective_function='modularity')
etime = time.time()
size = len(partition)
print(f'{size} {etime-stime}')

In [None]:
layout = g.layout_drl()
igraph.plot(g, 'graph.svg', layout=layout)