# Aanalysis code in year 4

In [None]:
import os
import sqlite3
import time
import datetime
import statistics

import numpy as np
import pandas as pd
from scipy.stats import moment

In [None]:
indexpath = 'dbv3-index.db'
corepath = 'dbv3-core.db'
servicepath = 'dbv3-service.db'
utilpath = 'dbv3-util.db'

In [None]:
conn = sqlite3.connect(':memory:')
cur = conn.cursor()
cur.execute(f'''ATTACH DATABASE '{indexpath}' AS DBINDEX;''')
cur.execute(f'''ATTACH DATABASE '{corepath}' AS DBCORE;''')
cur.execute(f'''ATTACH DATABASE '{servicepath}' AS DBSERVICE;''')
cur.execute(f'''ATTACH DATABASE '{utilpath}' AS DBUTIL;''')
conn.commit()

In [None]:
tagpath = 'datasets/NamedCluster-Year4.csv'
rootaddrpath = 'datasets/NamedCluster-Year4-RootAddress.csv'

In [None]:
with open(tagpath, 'r') as f:
    reader = csv.DictReader(f)
    print(reader.fieldnames)

```sql
-- TODO: Cluster ID and Root Address by Address
SELECT MIN(DBSERVICE.Cluster.addr) AS clusterId, DBINDEX.AddrID.addr AS rootAddress
FROM DBSERVICE.Cluster
INNER JOIN DBINDEX.AddrID ON DBSERVICE.Cluster.addr = DBINDEX.AddrID.id
WHERE DBSERVICE.Cluster.cluster = (
    SELECT DBSERVICE.Cluster.cluster
    FROM DBSERVICE.Cluster
    WHERE DBSERVICE.Cluster.addr = (
        SELECT DBINDEX.AddrID.id
        FROM DBINDEX.AddrID
        WHERE DBINDEX.AddrID.addr = 'bc1q2lgzm0mh6qgydmc5vvauv2uh6e745yn797wwqs'));
```

In [None]:
query = '''
SELECT MIN(DBSERVICE.Cluster.addr) AS clusterId, DBINDEX.AddrID.addr AS rootAddress
FROM DBSERVICE.Cluster
INNER JOIN DBINDEX.AddrID ON DBSERVICE.Cluster.addr = DBINDEX.AddrID.id
WHERE DBSERVICE.Cluster.cluster = (
    SELECT DBSERVICE.Cluster.cluster
    FROM DBSERVICE.Cluster
    WHERE DBSERVICE.Cluster.addr = (
        SELECT DBINDEX.AddrID.id
        FROM DBINDEX.AddrID
        WHERE DBINDEX.AddrID.addr = ?));
'''

In [None]:
rows = []
with open(tagpath, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        cur.execute(query, (row['address'],))
        res = cur.fetchone()
        rows.append((row['categoryName'], row['clusterName'], row['address'], res[1], res[0]))
print(len(rows))

In [None]:
df = pd.DataFrame(rows, columns=('categoryName', 'clusterName', 'address', 'rootAddress', 'clusterId'))
df

In [None]:
df.to_csv(rootaddrpath)

In [None]:
def get_feature(conn, cur, addr):
    result = dict()
    result['addr'] = addr
    result['updatetime'] = int(datetime.datetime.now().timestamp())
    # tx
    cur.execute('''SELECT COUNT(tx)
                   FROM (
                     SELECT DBCORE.TxIn.tx AS tx
                     FROM DBCORE.TxIn
                     INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                            AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                     WHERE DBCORE.TxOut.addr = ?
                     UNION
                     SELECT DBCORE.TxOut.tx AS tx
                     FROM DBCORE.TxOut
                     WHERE DBCORE.TxOut.addr = ?);''', (addr, addr))
    result['cnttx'] = cur.fetchone()[0] # Always return
    cur.execute('''SELECT COUNT(DISTINCT DBCORE.TxIn.tx)
                   FROM DBCORE.TxIn
                   INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                          AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                   WHERE DBCORE.TxOut.addr = ?;''', (addr, ))
    result['cnttxin'] = cur.fetchone()[0] # Always return
    cur.execute('''SELECT COUNT(DISTINCT DBCORE.TxOut.tx)
                   FROM DBCORE.TxOut
                   WHERE DBCORE.TxOut.addr = ?;''', (addr, ))
    result['cnttxout'] = cur.fetchone()[0] # Always return
    # btc
    cur.execute('''SELECT A.btc + B.btc
                   FROM (
                     SELECT SUM(DBCORE.TxOut.btc) AS btc
                     FROM DBCORE.TxOut
                     WHERE DBCORE.TxOut.Addr = ?) AS A
                     , (
                     SELECT SUM(DBCORE.TxOut.btc) AS btc
                     FROM DBCORE.TxIn
                     INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                            AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                     WHERE DBCORE.TxOut.Addr = ?) AS B;''', (addr, addr))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['btc'] = res
    cur.execute('''SELECT SUM(DBCORE.TxOut.btc) AS btc
                   FROM DBCORE.TxIn
                   INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                          AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                   WHERE DBCORE.TxOut.Addr = ?''', (addr,))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['btcin'] = res
    cur.execute('''SELECT SUM(DBCORE.TxOut.btc) AS btc
                   FROM DBCORE.TxOut
                   WHERE DBCORE.TxOut.Addr = ?''', (addr,))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['btcout'] = res
    # use
    cur.execute('''SELECT COUNT(tx)
                   FROM (
                     SELECT DBCORE.TxIn.tx AS tx
                     FROM DBCORE.TxIn
                     INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                            AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                     WHERE DBCORE.TxOut.addr = ?
                     UNION ALL
                     SELECT DBCORE.TxOut.tx AS tx
                     FROM DBCORE.TxOut
                     WHERE DBCORE.TxOut.addr = ?);''', (addr, addr))
    result['cntuse'] = cur.fetchone()[0] # Always return
    cur.execute('''SELECT COUNT(DBCORE.TxIn.tx)
                   FROM DBCORE.TxIn
                   INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                          AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                   WHERE DBCORE.TxOut.addr = ?;''', (addr, ))
    result['cntusein'] = cur.fetchone()[0] # Always return
    cur.execute('''SELECT COUNT(DBCORE.TxOut.tx)
                   FROM DBCORE.TxOut
                   WHERE DBCORE.TxOut.addr = ?;''', (addr, ))
    result['cntuseout'] = cur.fetchone()[0] # Always return
    # age
    cur.execute('''SELECT MAX(DBCORE.BlkTime.unixtime) - MIN(DBCORE.BlkTime.unixtime)
                   FROM (
                     SELECT DBCORE.TxIn.tx AS tx
                     FROM DBCORE.TxIn
                     INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                            AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                     WHERE DBCORE.TxOut.addr = ?
                     UNION
                     SELECT DBCORE.TxOut.tx AS tx
                     FROM DBCORE.TxOut
                     WHERE DBCORE.TxOut.addr = ?) AS T
                   INNER JOIN DBCORE.BlkTx ON T.tx = DBCORE.BlkTx.tx
                   INNER JOIN DBCORE.BlkTime ON DBCORE.BlkTx.blk = DBCORE.BlkTime.blk''', (addr, addr))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['age'] = res
    cur.execute('''SELECT MAX(DBCORE.BlkTime.unixtime) - MIN(DBCORE.BlkTime.unixtime)
                   FROM (
                     SELECT DBCORE.TxIn.tx AS tx
                     FROM DBCORE.TxIn
                     INNER JOIN DBCORE.TxOut ON DBCORE.TxIn.ptx = DBCORE.TxOut.tx
                                            AND DBCORE.TxIn.pn = DBCORE.TxOut.n
                     WHERE DBCORE.TxOut.addr = ?) AS T
                   INNER JOIN DBCORE.BlkTx ON T.tx = DBCORE.BlkTx.tx
                   INNER JOIN DBCORE.BlkTime ON DBCORE.BlkTx.blk = DBCORE.BlkTime.blk''', (addr,))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['agein'] = res
    cur.execute('''SELECT MAX(DBCORE.BlkTime.unixtime) - MIN(DBCORE.BlkTime.unixtime)
                   FROM (
                     SELECT DBCORE.TxOut.tx AS tx
                     FROM DBCORE.TxOut
                     WHERE DBCORE.TxOut.addr = ?) AS T
                   INNER JOIN DBCORE.BlkTx ON T.tx = DBCORE.BlkTx.tx
                   INNER JOIN DBCORE.BlkTime ON DBCORE.BlkTx.blk = DBCORE.BlkTime.blk''', (addr,))
    res = cur.fetchone()[0]
    if res is None:
        res = 0
    else:
        res = res
    result['ageout'] = res
    # addrtype
    result['addrtypep2pkh'] = 0 # 1
    result['addrtypep2sh'] = 0 # 3
    result['addrtypebech32'] = 0 #bc1
    result['addrtypeother'] = 0
    cur.execute('''SELECT DBINDEX.AddrID.addr
                   FROM DBINDEX.AddrID
                   WHERE DBINDEX.AddrID.id = ?''', (addr,))
    res = cur.fetchone()[0]
    if res is None:
        pass
    elif res[0].startswith('1'):
        result['addrtypep2pkh'] = 1
    elif res[0].startswith('3'):
        result['addrtypep2sh'] = 1
    elif res[0].startswith('bc1'):
        result['addrtypebech32'] = 1
    else:
        result['addrtypeother'] = 1

    return (
            result['addr'], 
            # result['updatetime'], 
            result['cnttx'], result['cnttxin'], result['cnttxout'],
            result['btc'], result['btcin'], result['btcout'], result['cntuse'], result['cntusein'], 
            result['cntuseout'], result['age'], result['agein'], result['ageout'], result['addrtypep2pkh'], 
            result['addrtypep2sh'], result['addrtypebech32'], result['addrtypeother'])

In [None]:
data = []
for idx, row in df.iterrows():
    addr = row['address']
    cur.execute(query, (addr,))
    addrid = cur.fetchone()[0]
    data.append(get_feature(conn, cur, addrid))

In [None]:
fdf = pd.DataFrame(data, columns=['address', 
                                  'cnttx', 'cnttxin', 'cnttxout', 'btc', 'btcin', 
                                  'btcout', 'cntuse', 'cntusein', 'cntuseout', 'age',
                                  'agein', 'ageout', 'addrtypep2pkh', 'addrtypep2sh', 'addrtypebech32',
                                  'addrtypeother'])
fdf

In [None]:
def get_associate_addr(conn, cur, addr):
    # target
    result = {addr}
    mi = set()
    in1 = set()
    out1 = set()
    # multiinput
    for row in cur.execute('''SELECT DBSERVICE.Cluster.addr
                              FROM DBSERVICE.Cluster
                              WHERE DBSERVICE.Cluster.cluster IN (
                                SELECT DBSERVICE.Cluster.cluster
                                FROM DBSERVICE.Cluster
                                WHERE DBSERVICE.Cluster.addr = ?);''', (addr,)):
        result.add(row[0])
        mi.add(row[0])
    for row in cur.execute('''SELECT DBUTIL.Edge.src
                              FROM DBUTIL.Edge
                              WHERE DBUTIL.Edge.dst = ?;''', (addr,)):
        result.add(row[0])
        in1.add(row[0])
    for row in cur.execute('''SELECT DBUTIL.Edge.dst
                              FROM DBUTIL.Edge
                              WHERE DBUTIL.Edge.src = ?;''', (addr,)):
        result.add(row[0])
        out1.add(row[0])
    return (result, mi, in1, out1)


def get_feature_vector(conn, cur, addrid):
    # switch function by addrid type
    if type(addrid) == int:
        return _get_feature_vector_int(conn, cur, addrid)
    elif type(addrid) == list or type(addrid) == set:
        return _get_feature_vector_list(conn, cur, addrid)


def _get_feature_vector_int(conn, cur, addrid):
    vector = []
    # target
    cur.execute('''SELECT DBSERVICE.Feature.cnttx, DBSERVICE.Feature.cnttxin, DBSERVICE.Feature.cnttxout,
                          DBSERVICE.Feature.btc, DBSERVICE.Feature.btcin, DBSERVICE.Feature.btcout,
                          DBSERVICE.Feature.cntuse, DBSERVICE.Feature.cntusein, DBSERVICE.Feature.cntuseout,
                          DBSERVICE.Feature.age, DBSERVICE.Feature.agein, DBSERVICE.Feature.ageout,
                          DBSERVICE.Feature.addrtypep2pkh, DBSERVICE.Feature.addrtypep2sh,
                          DBSERVICE.Feature.addrtypebech32, DBSERVICE.Feature.addrtypeother
                   FROM DBSERVICE.Feature
                   WHERE DBSERVICE.Feature.addr = ?;''', (addrid,))
    res = cur.fetchone()
    vector.append(res[0])
    vector.append(res[1])
    vector.append(res[2])
    vector.append(res[3])
    vector.append(res[4])
    vector.append(res[5])
    vector.append(res[6])
    vector.append(res[7])
    vector.append(res[8])
    vector.append(res[9])
    vector.append(res[10])
    vector.append(res[11])
    vector.append(res[12])
    vector.append(res[13])
    vector.append(res[14])
    vector.append(res[15])

    return vector


def _get_feature_vector_list(conn, cur, addrid):
    vector = []
    cur.execute('''DROP TABLE IF EXISTS AddrList;''')
    cur.execute('''CREATE TABLE IF NOT EXISTS AddrList (
                     addr INTEGER PRIMARY KEY);''')
    conn.commit()
    cur.execute('BEGIN TRANSACTION')
    for addr in addrid:
        cur.execute('''INSERT OR IGNORE INTO AddrList (
                         addr) VALUES (
                         ?);''', (addr,))
    cur.execute('COMMIT TRANSACTION')
    conn.commit()
    cur.execute('''SELECT DBSERVICE.Feature.cnttx, DBSERVICE.Feature.cnttxin, DBSERVICE.Feature.cnttxout,
                          DBSERVICE.Feature.btc, DBSERVICE.Feature.btcin, DBSERVICE.Feature.btcout,
                          DBSERVICE.Feature.cntuse, DBSERVICE.Feature.cntusein, DBSERVICE.Feature.cntuseout,
                          DBSERVICE.Feature.age, DBSERVICE.Feature.agein, DBSERVICE.Feature.ageout,
                          DBSERVICE.Feature.addrtypep2pkh, DBSERVICE.Feature.addrtypep2sh,
                          DBSERVICE.Feature.addrtypebech32, DBSERVICE.Feature.addrtypeother
                   FROM AddrList
                   INNER JOIN DBSERVICE.Feature ON DBSERVICE.Feature.addr = AddrList.addr;''')
    res = pd.DataFrame(cur.fetchall())
    if len(res) > 0:
        vector.append(min(res[0]))
        vector.append(max(res[0]))
        vector.append(sum(res[0]))
        vector.append(statistics.median(res[0]))
        vector.append(statistics.mean(res[0]))
        vector.append(moment(res[0], moment=2))
        vector.append(moment(res[0], moment=3))
        vector.append(moment(res[0], moment=4))
        vector.append(min(res[1]))
        vector.append(max(res[1]))
        vector.append(sum(res[1]))
        vector.append(statistics.median(res[1]))
        vector.append(statistics.mean(res[1]))
        vector.append(moment(res[1], moment=2))
        vector.append(moment(res[1], moment=3))
        vector.append(moment(res[1], moment=4))
        vector.append(min(res[2]))
        vector.append(max(res[2]))
        vector.append(sum(res[2]))
        vector.append(statistics.median(res[2]))
        vector.append(statistics.mean(res[2]))
        vector.append(moment(res[2], moment=2))
        vector.append(moment(res[2], moment=3))
        vector.append(moment(res[2], moment=4))
        vector.append(min(res[3]))
        vector.append(max(res[3]))
        vector.append(sum(res[3]))
        vector.append(statistics.median(res[3]))
        vector.append(statistics.mean(res[3]))
        vector.append(moment(res[3], moment=2))
        vector.append(moment(res[3], moment=3))
        vector.append(moment(res[3], moment=4))
        vector.append(min(res[4]))
        vector.append(max(res[4]))
        vector.append(sum(res[4]))
        vector.append(statistics.median(res[4]))
        vector.append(statistics.mean(res[4]))
        vector.append(moment(res[4], moment=2))
        vector.append(moment(res[4], moment=3))
        vector.append(moment(res[4], moment=4))
        vector.append(min(res[5]))
        vector.append(max(res[5]))
        vector.append(sum(res[5]))
        vector.append(statistics.median(res[5]))
        vector.append(statistics.mean(res[5]))
        vector.append(moment(res[5], moment=2))
        vector.append(moment(res[5], moment=3))
        vector.append(moment(res[5], moment=4))
        vector.append(min(res[6]))
        vector.append(max(res[6]))
        vector.append(sum(res[6]))
        vector.append(statistics.median(res[6]))
        vector.append(statistics.mean(res[6]))
        vector.append(moment(res[6], moment=2))
        vector.append(moment(res[6], moment=3))
        vector.append(moment(res[6], moment=4))
        vector.append(min(res[7]))
        vector.append(max(res[7]))
        vector.append(sum(res[7]))
        vector.append(statistics.median(res[7]))
        vector.append(statistics.mean(res[7]))
        vector.append(moment(res[7], moment=2))
        vector.append(moment(res[7], moment=3))
        vector.append(moment(res[7], moment=4))
        vector.append(min(res[8]))
        vector.append(max(res[8]))
        vector.append(sum(res[8]))
        vector.append(statistics.median(res[8]))
        vector.append(statistics.mean(res[8]))
        vector.append(moment(res[8], moment=2))
        vector.append(moment(res[8], moment=3))
        vector.append(moment(res[8], moment=4))
        vector.append(min(res[9]))
        vector.append(max(res[9]))
        vector.append(sum(res[9]))
        vector.append(statistics.median(res[9]))
        vector.append(statistics.mean(res[9]))
        vector.append(moment(res[9], moment=2))
        vector.append(moment(res[9], moment=3))
        vector.append(moment(res[9], moment=4))
        vector.append(min(res[10]))
        vector.append(max(res[10]))
        vector.append(sum(res[10]))
        vector.append(statistics.median(res[10]))
        vector.append(statistics.mean(res[10]))
        vector.append(moment(res[10], moment=2))
        vector.append(moment(res[10], moment=3))
        vector.append(moment(res[10], moment=4))
        vector.append(min(res[11]))
        vector.append(max(res[11]))
        vector.append(sum(res[11]))
        vector.append(statistics.median(res[11]))
        vector.append(statistics.mean(res[11]))
        vector.append(moment(res[11], moment=2))
        vector.append(moment(res[11], moment=3))
        vector.append(moment(res[11], moment=4))
        vector.append(min(res[12]))
        vector.append(max(res[12]))
        vector.append(sum(res[12]))
        vector.append(statistics.median(res[12]))
        vector.append(statistics.mean(res[12]))
        vector.append(moment(res[12], moment=2))
        vector.append(moment(res[12], moment=3))
        vector.append(moment(res[12], moment=4))
        vector.append(min(res[13]))
        vector.append(max(res[13]))
        vector.append(sum(res[13]))
        vector.append(statistics.median(res[13]))
        vector.append(statistics.mean(res[13]))
        vector.append(moment(res[13], moment=2))
        vector.append(moment(res[13], moment=3))
        vector.append(moment(res[13], moment=4))
        vector.append(min(res[14]))
        vector.append(max(res[14]))
        vector.append(sum(res[14]))
        vector.append(statistics.median(res[14]))
        vector.append(statistics.mean(res[14]))
        vector.append(moment(res[14], moment=2))
        vector.append(moment(res[14], moment=3))
        vector.append(moment(res[14], moment=4))
        vector.append(min(res[15]))
        vector.append(max(res[15]))
        vector.append(sum(res[15]))
        vector.append(statistics.median(res[15]))
        vector.append(statistics.mean(res[15]))
        vector.append(moment(res[15], moment=2))
        vector.append(moment(res[15], moment=3))
        vector.append(moment(res[15], moment=4))
    else:
        vector.extend([0]*128)

    return vector

In [None]:
query = '''
SELECT DBINDEX.AddrID.id
FROM DBINDEX.AddrID
WHERE DBINDEX.AddrID.addr = ?;
'''

In [None]:
FEATURES = ['address', 
            'AddressID',
            'CntTx', 'CntTxIn', 'CntTxOut', 'BTC', 'BTCIn', 'BTCOut',
            'CntUse', 'CntUseIn', 'CntUseOut', 'Age', 'AgeIn', 'AgeOut',
            'AddrTypeP2PKH', 'AddrTypeP2SH', 'AddrTypeBech32', 'AddrTypeOther',
            'MI_CntTx_MIN', 'MI_CntTx_MAX', 'MI_CntTx_SUM', 'MI_CntTx_MEDIAN',
            'MI_CntTx_M1', 'MI_CntTx_M2', 'MI_CntTx_M3', 'MI_CntTx_M4',
            'MI_CntTxIn_MIN', 'MI_CntTxIn_MAX', 'MI_CntTxIn_SUM', 'MI_CntTxIn_MEDIAN',
            'MI_CntTxIn_M1', 'MI_CntTxIn_M2', 'MI_CntTxIn_M3', 'MI_CntTxIn_M4',
            'MI_CntTxOut_MIN', 'MI_CntTxOut_MAX', 'MI_CntTxOut_SUM', 'MI_CntTxOut_MEDIAN',
            'MI_CntTxOut_M1', 'MI_CntTxOut_M2', 'MI_CntTxOut_M3', 'MI_CntTxOut_M4',
            'MI_BTC_MIN', 'MI_BTC_MAX', 'MI_BTC_SUM', 'MI_BTC_MEDIAN',
            'MI_BTC_M1', 'MI_BTC_M2', 'MI_BTC_M3', 'MI_BTC_M4',
            'MI_BTCIn_MIN', 'MI_BTCIn_MAX', 'MI_BTCIn_SUM', 'MI_BTCIn_MEDIAN',
            'MI_BTCIn_M1', 'MI_BTCIn_M2', 'MI_BTCIn_M3', 'MI_BTCIn_M4',
            'MI_BTCOut_MIN', 'MI_BTCOut_MAX', 'MI_BTCOut_SUM', 'MI_BTCOut_MEDIAN',
            'MI_BTCOut_M1', 'MI_BTCOut_M2', 'MI_BTCOut_M3', 'MI_BTCOut_M4',
            'MI_CntUse_MIN', 'MI_CntUse_MAX', 'MI_CntUse_SUM', 'MI_CntUse_MEDIAN',
            'MI_CntUse_M1', 'MI_CntUse_M2', 'MI_CntUse_M3', 'MI_CntUse_M4',
            'MI_CntUseIn_MIN', 'MI_CntUseIn_MAX', 'MI_CntUseIn_SUM', 'MI_CntUseIn_MEDIAN',
            'MI_CntUseIn_M1', 'MI_CntUseIn_M2', 'MI_CntUseIn_M3', 'MI_CntUseIn_M4',
            'MI_CntUseOut_MIN', 'MI_CntUseOut_MAX', 'MI_CntUseOut_SUM', 'MI_CntUseOut_MEDIAN',
            'MI_CntUseOut_M1', 'MI_CntUseOut_M2', 'MI_CntUseOut_M3', 'MI_CntUseOut_M4',
            'MI_Age_MIN', 'MI_Age_MAX', 'MI_Age_SUM', 'MI_Age_MEDIAN',
            'MI_Age_M1', 'MI_Age_M2', 'MI_Age_M3', 'MI_Age_M4',
            'MI_AgeIn_MIN', 'MI_AgeIn_MAX', 'MI_AgeIn_SUM', 'MI_AgeIn_MEDIAN',
            'MI_AgeIn_M1', 'MI_AgeIn_M2', 'MI_AgeIn_M3', 'MI_AgeIn_M4',
            'MI_AgeOut_MIN', 'MI_AgeOut_MAX', 'MI_AgeOut_SUM', 'MI_AgeOut_MEDIAN',
            'MI_AgeOut_M1', 'MI_AgeOut_M2', 'MI_AgeOut_M3', 'MI_AgeOut_M4',
            'MI_AddrTypeP2PKH_MIN', 'MI_AddrTypeP2PKH_MAX', 'MI_AddrTypeP2PKH_SUM', 'MI_AddrTypeP2PKH_MEDIAN',
            'MI_AddrTypeP2PKH_M1', 'MI_AddrTypeP2PKH_M2', 'MI_AddrTypeP2PKH_M3', 'MI_AddrTypeP2PKH_M4',
            'MI_AddrTypeP2SH_MIN', 'MI_AddrTypeP2SH_MAX', 'MI_AddrTypeP2SH_SUM', 'MI_AddrTypeP2SH_MEDIAN',
            'MI_AddrTypeP2SH_M1', 'MI_AddrTypeP2SH_M2', 'MI_AddrTypeP2SH_M3', 'MI_AddrTypeP2SH_M4',
            'MI_AddrTypeBech32_MIN', 'MI_AddrTypeBech32_MAX', 'MI_AddrTypeBech32_SUM', 'MI_AddrTypeBech32_MEDIAN',
            'MI_AddrTypeBech32_M1', 'MI_AddrTypeBech32_M2', 'MI_AddrTypeBech32_M3', 'MI_AddrTypeBech32_M4',
            'MI_AddrTypeOther_MIN', 'MI_AddrTypeOther_MAX', 'MI_AddrTypeOther_SUM', 'MI_AddrTypeOther_MEDIAN',
            'MI_AddrTypeOther_M1', 'MI_AddrTypeOther_M2', 'MI_AddrTypeOther_M3', 'MI_AddrTypeOther_M4',
            'IN1_CntTx_MIN', 'IN1_CntTx_MAX', 'IN1_CntTx_SUM', 'IN1_CntTx_MEDIAN',
            'IN1_CntTx_M1', 'IN1_CntTx_M2', 'IN1_CntTx_M3', 'IN1_CntTx_M4',
            'IN1_CntTxIn_MIN', 'IN1_CntTxIn_MAX', 'IN1_CntTxIn_SUM', 'IN1_CntTxIn_MEDIAN',
            'IN1_CntTxIn_M1', 'IN1_CntTxIn_M2', 'IN1_CntTxIn_M3', 'IN1_CntTxIn_M4',
            'IN1_CntTxOut_MIN', 'IN1_CntTxOut_MAX', 'IN1_CntTxOut_SUM', 'IN1_CntTxOut_MEDIAN',
            'IN1_CntTxOut_M1', 'IN1_CntTxOut_M2', 'IN1_CntTxOut_M3', 'IN1_CntTxOut_M4',
            'IN1_BTC_MIN', 'IN1_BTC_MAX', 'IN1_BTC_SUM', 'IN1_BTC_MEDIAN',
            'IN1_BTC_M1', 'IN1_BTC_M2', 'IN1_BTC_M3', 'IN1_BTC_M4',
            'IN1_BTCIn_MIN', 'IN1_BTCIn_MAX', 'IN1_BTCIn_SUM', 'IN1_BTCIn_MEDIAN',
            'IN1_BTCIn_M1', 'IN1_BTCIn_M2', 'IN1_BTCIn_M3', 'IN1_BTCIn_M4',
            'IN1_BTCOut_MIN', 'IN1_BTCOut_MAX', 'IN1_BTCOut_SUM', 'IN1_BTCOut_MEDIAN',
            'IN1_BTCOut_M1', 'IN1_BTCOut_M2', 'IN1_BTCOut_M3', 'IN1_BTCOut_M4',
            'IN1_CntUse_MIN', 'IN1_CntUse_MAX', 'IN1_CntUse_SUM', 'IN1_CntUse_MEDIAN',
            'IN1_CntUse_M1', 'IN1_CntUse_M2', 'IN1_CntUse_M3', 'IN1_CntUse_M4',
            'IN1_CntUseIn_MIN', 'IN1_CntUseIn_MAX', 'IN1_CntUseIn_SUM', 'IN1_CntUseIn_MEDIAN',
            'IN1_CntUseIn_M1', 'IN1_CntUseIn_M2', 'IN1_CntUseIn_M3', 'IN1_CntUseIn_M4',
            'IN1_CntUseOut_MIN', 'IN1_CntUseOut_MAX', 'IN1_CntUseOut_SUM', 'IN1_CntUseOut_MEDIAN',
            'IN1_CntUseOut_M1', 'IN1_CntUseOut_M2', 'IN1_CntUseOut_M3', 'IN1_CntUseOut_M4',
            'IN1_Age_MIN', 'IN1_Age_MAX', 'IN1_Age_SUM', 'IN1_Age_MEDIAN',
            'IN1_Age_M1', 'IN1_Age_M2', 'IN1_Age_M3', 'IN1_Age_M4',
            'IN1_AgeIn_MIN', 'IN1_AgeIn_MAX', 'IN1_AgeIn_SUM', 'IN1_AgeIn_MEDIAN',
            'IN1_AgeIn_M1', 'IN1_AgeIn_M2', 'IN1_AgeIn_M3', 'IN1_AgeIn_M4',
            'IN1_AgeOut_MIN', 'IN1_AgeOut_MAX', 'IN1_AgeOut_SUM', 'IN1_AgeOut_MEDIAN',
            'IN1_AgeOut_M1', 'IN1_AgeOut_M2', 'IN1_AgeOut_M3', 'IN1_AgeOut_M4',
            'IN1_AddrTypeP2PKH_MIN', 'IN1_AddrTypeP2PKH_MAX', 'IN1_AddrTypeP2PKH_SUM', 'IN1_AddrTypeP2PKH_MEDIAN',
            'IN1_AddrTypeP2PKH_M1', 'IN1_AddrTypeP2PKH_M2', 'IN1_AddrTypeP2PKH_M3', 'IN1_AddrTypeP2PKH_M4',
            'IN1_AddrTypeP2SH_MIN', 'IN1_AddrTypeP2SH_MAX', 'IN1_AddrTypeP2SH_SUM', 'IN1_AddrTypeP2SH_MEDIAN',
            'IN1_AddrTypeP2SH_M1', 'IN1_AddrTypeP2SH_M2', 'IN1_AddrTypeP2SH_M3', 'IN1_AddrTypeP2SH_M4',
            'IN1_AddrTypeBech32_MIN', 'IN1_AddrTypeBech32_MAX', 'IN1_AddrTypeBech32_SUM', 'IN1_AddrTypeBech32_MEDIAN',
            'IN1_AddrTypeBech32_M1', 'IN1_AddrTypeBech32_M2', 'IN1_AddrTypeBech32_M3', 'IN1_AddrTypeBech32_M4',
            'IN1_AddrTypeOther_MIN', 'IN1_AddrTypeOther_MAX', 'IN1_AddrTypeOther_SUM', 'IN1_AddrTypeOther_MEDIAN',
            'IN1_AddrTypeOther_M1', 'IN1_AddrTypeOther_M2', 'IN1_AddrTypeOther_M3', 'IN1_AddrTypeOther_M4',
            'OUT1_CntTx_MIN', 'OUT1_CntTx_MAX', 'OUT1_CntTx_SUM', 'OUT1_CntTx_MEDIAN',
            'OUT1_CntTx_M1', 'OUT1_CntTx_M2', 'OUT1_CntTx_M3', 'OUT1_CntTx_M4',
            'OUT1_CntTxIn_MIN', 'OUT1_CntTxIn_MAX', 'OUT1_CntTxIn_SUM', 'OUT1_CntTxIn_MEDIAN',
            'OUT1_CntTxIn_M1', 'OUT1_CntTxIn_M2', 'OUT1_CntTxIn_M3', 'OUT1_CntTxIn_M4',
            'OUT1_CntTxOut_MIN', 'OUT1_CntTxOut_MAX', 'OUT1_CntTxOut_SUM', 'OUT1_CntTxOut_MEDIAN',
            'OUT1_CntTxOut_M1', 'OUT1_CntTxOut_M2', 'OUT1_CntTxOut_M3', 'OUT1_CntTxOut_M4',
            'OUT1_BTC_MIN', 'OUT1_BTC_MAX', 'OUT1_BTC_SUM', 'OUT1_BTC_MEDIAN',
            'OUT1_BTC_M1', 'OUT1_BTC_M2', 'OUT1_BTC_M3', 'OUT1_BTC_M4',
            'OUT1_BTCIn_MIN', 'OUT1_BTCIn_MAX', 'OUT1_BTCIn_SUM', 'OUT1_BTCIn_MEDIAN',
            'OUT1_BTCIn_M1', 'OUT1_BTCIn_M2', 'OUT1_BTCIn_M3', 'OUT1_BTCIn_M4',
            'OUT1_BTCOut_MIN', 'OUT1_BTCOut_MAX', 'OUT1_BTCOut_SUM', 'OUT1_BTCOut_MEDIAN',
            'OUT1_BTCOut_M1', 'OUT1_BTCOut_M2', 'OUT1_BTCOut_M3', 'OUT1_BTCOut_M4',
            'OUT1_CntUse_MIN', 'OUT1_CntUse_MAX', 'OUT1_CntUse_SUM', 'OUT1_CntUse_MEDIAN',
            'OUT1_CntUse_M1', 'OUT1_CntUse_M2', 'OUT1_CntUse_M3', 'OUT1_CntUse_M4',
            'OUT1_CntUseIn_MIN', 'OUT1_CntUseIn_MAX', 'OUT1_CntUseIn_SUM', 'OUT1_CntUseIn_MEDIAN',
            'OUT1_CntUseIn_M1', 'OUT1_CntUseIn_M2', 'OUT1_CntUseIn_M3', 'OUT1_CntUseIn_M4',
            'OUT1_CntUseOut_MIN', 'OUT1_CntUseOut_MAX', 'OUT1_CntUseOut_SUM', 'OUT1_CntUseOut_MEDIAN',
            'OUT1_CntUseOut_M1', 'OUT1_CntUseOut_M2', 'OUT1_CntUseOut_M3', 'OUT1_CntUseOut_M4',
            'OUT1_Age_MIN', 'OUT1_Age_MAX', 'OUT1_Age_SUM', 'OUT1_Age_MEDIAN',
            'OUT1_Age_M1', 'OUT1_Age_M2', 'OUT1_Age_M3', 'OUT1_Age_M4',
            'OUT1_AgeIn_MIN', 'OUT1_AgeIn_MAX', 'OUT1_AgeIn_SUM', 'OUT1_AgeIn_MEDIAN',
            'OUT1_AgeIn_M1', 'OUT1_AgeIn_M2', 'OUT1_AgeIn_M3', 'OUT1_AgeIn_M4',
            'OUT1_AgeOut_MIN', 'OUT1_AgeOut_MAX', 'OUT1_AgeOut_SUM', 'OUT1_AgeOut_MEDIAN',
            'OUT1_AgeOut_M1', 'OUT1_AgeOut_M2', 'OUT1_AgeOut_M3', 'OUT1_AgeOut_M4',
            'OUT1_AddrTypeP2PKH_MIN', 'OUT1_AddrTypeP2PKH_MAX', 'OUT1_AddrTypeP2PKH_SUM', 'OUT1_AddrTypeP2PKH_MEDIAN',
            'OUT1_AddrTypeP2PKH_M1', 'OUT1_AddrTypeP2PKH_M2', 'OUT1_AddrTypeP2PKH_M3', 'OUT1_AddrTypeP2PKH_M4',
            'OUT1_AddrTypeP2SH_MIN', 'OUT1_AddrTypeP2SH_MAX', 'OUT1_AddrTypeP2SH_SUM', 'OUT1_AddrTypeP2SH_MEDIAN',
            'OUT1_AddrTypeP2SH_M1', 'OUT1_AddrTypeP2SH_M2', 'OUT1_AddrTypeP2SH_M3', 'OUT1_AddrTypeP2SH_M4',
            'OUT1_AddrTypeBech32_MIN', 'OUT1_AddrTypeBech32_MAX', 'OUT1_AddrTypeBech32_SUM', 'OUT1_AddrTypeBech32_MEDIAN',
            'OUT1_AddrTypeBech32_M1', 'OUT1_AddrTypeBech32_M2', 'OUT1_AddrTypeBech32_M3', 'OUT1_AddrTypeBech32_M4',
            'OUT1_AddrTypeOther_MIN', 'OUT1_AddrTypeOther_MAX', 'OUT1_AddrTypeOther_SUM', 'OUT1_AddrTypeOther_MEDIAN',
            'OUT1_AddrTypeOther_M1', 'OUT1_AddrTypeOther_M2', 'OUT1_AddrTypeOther_M3', 'OUT1_AddrTypeOther_M4']

In [None]:
data = []
for idx, row in df.iterrows():
    addr = row['address']
    cur.execute(query, (addr,))
    addrid = cur.fetchone()[0]
    total, mi, in1, out1 = get_associate_addr(conn, cur, addrid)
    mi_vector = get_feature_vector(conn, cur, mi) # Multi input
    in1_vector = get_feature_vector(conn, cur, in1) # In1
    out1_vector = get_feature_vector(conn, cur, out1) # Out1
    vector = [addr, addrid]
    vector.extend(get_feature_vector(conn, cur, addrid))
    vector.extend(mi_vector)
    vector.extend(in1_vector)
    vector.extend(out1_vector)
    data.append(vector)

In [None]:
fdf = pd.DataFrame(data, columns=FEATURES)
fdf

In [None]:
df

In [None]:
df.groupby(['rootAddress', 'clusterName']).sum()

In [None]:
for idx, row in df.groupby(['rootAddress', 'clusterName']).iterrows():
    print(idx)