## 태그 데이터 삽입

In [None]:
import csv
import sqlite3

In [None]:
indexpath = 'dbv3-index.db'
corepath = 'dbv3-core.db'
dbpath = 'dbv3-service.db'
conn = sqlite3.connect(dbpath)
cur = conn.cursor()
cur.execute(f'''ATTACH DATABASE '{indexpath}' AS DBINDEX;''')
cur.execute(f'''ATTACH DATABASE '{corepath}' AS DBCORE;''')
conn.commit()

In [None]:
tagpath = 'data/address_tags_210525.csv'

with open(tagpath, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        cur.execute('''SELECT id FROM DBINDEX.AddrID
                       WHERE DBINDEX.AddrID.addr = ?;''', (row['Address'],))
        addrid = cur.fetchone()[0]

        cur.execute('''INSERT OR IGNORE INTO AddrTagID (tag)
                       VALUES (?);''', (Row['Tag'],))
        conn.commit()
        cur.execute('''SELECT id FROM AddrTagID
                       WHERE AddrTagID.tag = ?;''', (row['Tag'],))
        tagid = cur.fetchone()[0]

        cur.execute('''INSERT OR IGNORE INTO AddrTag (addr, tag)
                       VALUES (?, ?);''', (addrid, tagid))
        conn.commit()

In [None]:
conn.close()

## 주소 특징 추출

In [None]:
import os
import csv
import sqlite3
import statistics
import collections

import numpy as np
import pandas as pd
from scipy.stats import moment

import matplotlib.pyplot as plt
%matplotlib inline
# %matplotlib notebook

In [None]:
# Cache DataFrame
cachepath = 'cache_address_210525.pickle'
if os.path.exists(cachepath):
    df = pd.read_pickle(cachepath)
else:
    df = pd.DataFrame()

In [None]:
# Connect DB
conn = sqlite3.connect(':memory:')
cur = conn.cursor()
cur.execute('''ATTACH DATABASE './dbv3-index.db' AS DBINDEX;''')
cur.execute('''ATTACH DATABASE './dbv3-core.db' AS DBCORE;''')
cur.execute('''ATTACH DATABASE './dbv3-util.db' AS DBUTIL;''')
cur.execute('''ATTACH DATABASE './dbv3-service.db' AS DBSERVICE;''')
conn.commit()

In [None]:
# Enqueue all of tagged addresses
queue = collections.deque()

for row in cur.execute('''SELECT DISTINCT addr FROM DBSERVICE.AddrTag;'''):
    queue.append(row[0])
print(f'Ready the tagged queue: {len(queue)}')

In [None]:
# Process feature for addresses
while queue:
    addr = queue.popleft()
    # Tx
    
    # InTx
    
    # OutTx
    
    # BTC
    
    # InBTC
    
    # OutBTC
    
    # Use
    
    # InUse
    
    # OutUse
    
    # Age
    
    # Agemmmm1~m4s
    
    # InAgemmmm1~m4s
    
    # OutAgemmmm1~m4s
    
    # isP2PKH
    # isP2SH
    # isBech32
    ## P2PKH which begin with the number 1, eg: 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2.
    ## P2SH type starting with the number 3, eg: 3J98t1WpEZ73CNmQviecrnyiWrnqRhWNLy.
    ## Bech32 type starting with bc1, eg: bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq.

In [None]:
df.to_pickle(cachepath)
conn.close()