In [1]:
import csv

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

#### Balance

In [None]:
balance_df = pd.read_csv('./balance.csv')
print(f'Columns name: {balance_df.columns.values}')
balance_df = balance_df[balance_df['btc'] != 0]

In [None]:
balance_stack = list()
pivot1 = 0
pivot2 = 1
for i in range(1, 8):
    balance_stack.append(len(balance_df[(pivot1 <= balance_df['btc']) & (balance_df['btc'] < pivot2)]))
    pivot1 = pivot2
    pivot2 = pivot2 * 10
print(balance_stack)

In [None]:
x = range(0, len(balance_stack))
tick_label=['[$10^0$, $10^1$)', '[$10^1$, $10^2$)', 
            '[$10^2$, $10^3$)', '[$10^3$, $10^4$)', 
            '[$10^4$, $10^5$)', '[$10^5$, $10^6$)', 
            '[$10^6$, $10^7$)']
fig = plt.figure(figsize=(8*1, 6*1))
fig.set_facecolor('white')
ax = fig.add_subplot()
bars = ax.bar(x, balance_stack, tick_label=tick_label, color='#8EB695', log=True)
ax.set_xticklabels(tick_label, rotation=25, fontsize='large')
ax.tick_params(axis='y', labelsize='large')
ax.set_xlabel('Balance (BTC)', fontsize='x-large')
ax.set_ylabel('The number of address', fontsize='x-large')
for index, rect in enumerate(bars):
    height = rect.get_height()
    ax.annotate(f'{format(balance_stack[index])}',
                 xy=(rect.get_x()+0.4, height),
                 ha='center',
                 va='bottom')
fig.savefig('balance_hist.png', bbox_inches='tight')

In [None]:
print(f'Balance by address number')
print(balance_df.sort_values(by=['btc', 'cnt'], ascending=False)[:10].to_csv())

print(f'Balance 비율 (총합: {sum(balance_df["btc"])})')
print(((balance_df.sort_values(by=['btc', 'cnt'], ascending=False)['btc'][:10])/sum(balance_df['btc'])*100).to_csv())

### Crytolocker Intra-cluster

In [None]:
crypto_df = pd.read_csv('./cryptolocker.csv')
print(f'Columns name: {crypto_df.columns.values}')

In [None]:
print(f'입금된 총 금액: {sum(crypto_df.btc)}')
print(f'입금된 총 횟수: {len(crypto_df)}')

In [None]:
import datetime 
tz_utc = datetime.timezone(datetime.timedelta())

def get_time(timestamp):
    return datetime.datetime.fromtimestamp(timestamp, tz=tz_utc)

def get_year(row):
    return datetime.datetime.fromtimestamp(row.unixtime, tz=tz_utc).year

def get_month(row):
    return datetime.datetime.fromtimestamp(row.unixtime, tz=tz_utc).month

def get_day(row):
    return datetime.datetime.fromtimestamp(row.unixtime, tz=tz_utc).day

In [None]:
crypto_df['year'] = crypto_df.apply(get_year, axis=1)
crypto_df['month'] = crypto_df.apply(get_month, axis=1)
crypto_df['day'] = crypto_df.apply(get_day, axis=1)

In [None]:
crypto_grp = crypto_df.groupby(by=['year', 'month'])
print(max(crypto_grp.sum().btc))
print(crypto_grp.sum().btc)

In [None]:
crypto_grp = crypto_df.groupby(by=['year', 'month', 'day'])

In [None]:
x = list() # datatime
y = list() # txcount
z = list() # btcsum
date = datetime.datetime(year=2013, month=9, day=1)
while True:
    if date > datetime.datetime(year=2014, month=2, day=28):
        break
    year = date.year
    month = date.month
    day = date.day
    dt = (year, month, day)
    if dt in crypto_grp.sum().index:
        txcnt = crypto_grp.count().loc[year, month, day].txid
        btcsum = crypto_grp.sum().loc[year, month, day].btc
    else:
        txcnt = 0
        btcsum = 0
    x.append(date)
    y.append(txcnt)
    z.append(btcsum)
    date = date + datetime.timedelta(days=1)

In [None]:
fig = plt.figure(figsize=(8*1, 6*1))
fig.set_facecolor('white')
ax = fig.add_subplot()
ax.scatter(x, y, [x if x != 0 else 0 for x in z], facecolors='none', edgecolors='salmon', linewidths=3)
ax.scatter(x, y, [x//2 if x != 0 else 0 for x in z], c='salmon', marker='+')
ax.tick_params(axis='x', labelsize='large', rotation=15)
ax.tick_params(axis='y', labelsize='large')
ax.set_xlabel('Date', fontsize='x-large')
ax.set_ylabel('The number of transactions', fontsize='x-large')
fig.savefig('cryptolocker.png', bbox_inches='tight')

### Korbit intra-cluster

In [None]:
korbit_df = pd.read_csv('./korbit_degree.csv')
print(f'Columns name: {korbit_df.columns.values}')

In [None]:
korbit_df['degree'] = korbit_df.indegree + korbit_df.outdegree
korbit_df['value'] = korbit_df.income + korbit_df.outcome

In [None]:
print(korbit_df.sort_values(by=['degree', 'value'], ascending=False)[:13].to_csv())

In [None]:
fig = plt.figure(figsize=(8*1, 6*1))
fig.set_facecolor('white')
ax = fig.add_subplot()
ax.hist(korbit_df.degree, max(korbit_df.degree), density=True, histtype='step', cumulative=True, 
        color='orange')
ax.set_xscale('log')
ax.tick_params(axis='x', labelsize='large')
ax.tick_params(axis='y', labelsize='large')
ax.set_xlabel('Degree', fontsize='x-large')
ax.set_ylabel('CDF', fontsize='x-large')
fig.savefig('korbit_degree.png', bbox_inches='tight')

In [None]:
print(len(korbit_df))
print(len(korbit_df[korbit_df.degree >= 1000]))

#### Silkroad pagerank

In [None]:
silkroad_df = pd.read_csv('./silkroad_graph.csv')
print(f'Columns name: {silkroad_df.columns.values}')

In [5]:
import igraph

In [None]:
vertices = set()
edges = list()
weights = list()
for index, row in silkroad_df.iterrows():
    if row['src'] not in vertices:
        vertices.add(str(row['src']))
    if row['dst'] not in vertices:
        vertices.add(str(row['dst']))
    edges.append((str(row['src']), str(row['dst'])))
    weights.append((row['btc']))
vertices = list(vertices)
print(f'노드: {len(vertices)}')
print(f'에지: {len(edges)}')

In [None]:
g = igraph.Graph(directed=True)
g.add_vertices(vertices)
g.add_edges([(x[0], x[1]) for x in edges])
g.es['weight'] = weights
g.write_pickle(f'silkroad.igraph')

In [None]:
pagerank = g.pagerank(weights=weights)
max(pagerank)

In [None]:
silkroad_pr = pd.DataFrame({'node': vertices, 'pagerank': pagerank})

In [None]:
print(silkroad_pr.sort_values(by=['pagerank', 'node'], ascending=False)[:10].to_csv())

In [None]:
silkt1_in = pd.read_csv('./silk_top1_income.csv')
silkt1_out = pd.read_csv('./silk_top1_outcome.csv')
silkt1_df = pd.concat([silkt1_in, silkt1_out])
silkt1_df = silkt1_df.sort_values(by=['unixtime', 'value'])
silkt2_in = pd.read_csv('./silk_top2_income.csv')
silkt2_out = pd.read_csv('./silk_top2_outcome.csv')
silkt2_df = pd.concat([silkt2_in, silkt2_out])
silkt2_df = silkt2_df.sort_values(by=['unixtime', 'value'])
silkt3_in = pd.read_csv('./silk_top3_income.csv')
silkt3_out = pd.read_csv('./silk_top3_outcome.csv')
silkt3_df = pd.concat([silkt3_in, silkt3_out])
silkt3_df = silkt3_df.sort_values(by=['unixtime', 'value'])

In [None]:
silkt1_df['year'] = silkt1_df.apply(get_year, axis=1)
silkt1_df['month'] = silkt1_df.apply(get_month, axis=1)
silkt1_df['day'] = silkt1_df.apply(get_day, axis=1)
print(f'Columns name: {silkt1_df.columns.values}')
silkt2_df['year'] = silkt2_df.apply(get_year, axis=1)
silkt2_df['month'] = silkt2_df.apply(get_month, axis=1)
silkt2_df['day'] = silkt2_df.apply(get_day, axis=1)
print(f'Columns name: {silkt2_df.columns.values}')
silkt3_df['year'] = silkt3_df.apply(get_year, axis=1)
silkt3_df['month'] = silkt3_df.apply(get_month, axis=1)
silkt3_df['day'] = silkt3_df.apply(get_day, axis=1)
print(f'Columns name: {silkt3_df.columns.values}')

In [None]:
silkt1_grp = silkt1_df.groupby(by=['year', 'month', 'day'])
silkt2_grp = silkt2_df.groupby(by=['year', 'month', 'day'])
silkt3_grp = silkt3_df.groupby(by=['year', 'month', 'day'])

In [None]:
max(max(silkt1_df.unixtime),
    max(silkt2_df.unixtime),
    max(silkt3_df.unixtime))

In [None]:
x = list() # datatime
y1 = list() # txcount
z1 = list() # btcsum
y2 = list() # txcount
z2 = list() # btcsum
y3 = list() # txcount
z3 = list() # btcsum
date = datetime.datetime(year=2012, month=1, day=1)
t1_bal = 0
t2_bal = 0
t3_bal = 0
while True:
    if date > datetime.datetime(year=2020, month=8, day=20):
        break
    
    year = date.year
    month = date.month
    day = date.day
    dt = (year, month, day)
    x.append(date)
    
    if dt in silkt1_grp.sum().index:
        txcnt = silkt1_grp.count().loc[year, month, day].value
        btcsum = silkt1_grp.sum().loc[year, month, day].value
    else:
        txcnt = 0
        btcsum = 0
    t1_bal = t1_bal + btcsum
    y1.append(txcnt)
    z1.append(t1_bal)
    
    if dt in silkt2_grp.sum().index:
        txcnt = silkt2_grp.count().loc[year, month, day].value
        btcsum = silkt2_grp.sum().loc[year, month, day].value
    else:
        txcnt = 0
        btcsum = 0
    t2_bal = t2_bal + btcsum
    y2.append(txcnt)
    z2.append(t2_bal)
    
    if dt in silkt3_grp.sum().index:
        txcnt = silkt3_grp.count().loc[year, month, day].value
        btcsum = silkt3_grp.sum().loc[year, month, day].value
    else:
        txcnt = 0
        btcsum = 0
    t3_bal = t3_bal + btcsum
    y3.append(txcnt)
    z3.append(t3_bal)
    
    date = date + datetime.timedelta(days=1)

In [None]:
silkt3_grp.sum().loc[2012, 10, 5].value

In [None]:
sum(silkt1_df.value[silkt1_df.value < 0])

In [None]:
sum(silkt1_df.value[silkt1_df.value > 0])

In [None]:
fig = plt.figure(figsize=(8*1, 8*1))
fig.set_facecolor('white')
ax = fig.add_subplot()
ax.plot(x, z1, label='Top 1) 1F1tAaz5x1HUXrCNLbtMDqcw6o5GNn4xqX')
ax.plot(x, z2, label='Top 2) 1DkyBEKt5S2GDtv7aQw6rQepAvnsRyHoYM')
ax.plot(x, z3, label='Top 3) 1NnqM24fFeAGf7NWxmhhFkQAciPqeWo3L')
ax.set_yscale('log')
ax.set_xlim((datetime.datetime(year=2011, month=12, day=1), datetime.datetime(year=2020, month=8, day=20)))
ax.tick_params(axis='x', labelsize='large', rotation=15)
ax.tick_params(axis='y', labelsize='large')
ax.set_xlabel('Date', fontsize='x-large')
ax.set_ylabel('Balance (BTC)', fontsize='x-large')
ax.legend()
fig.savefig('silkroad.png', bbox_inches='tight')

### UPbit

In [2]:
upbit_df = pd.read_csv('./upbit.csv')
print(f'Columns name: {upbit_df.columns.values}')

Columns name: ['saddr' 'daddr' 'btc' 'saddr_id' 'daddr_id' 'cnt']


In [None]:
vertices = set()
edges = list()
weights = list()
for index, row in upbit_df.iterrows():
    if row['saddr'] not in vertices:
        vertices.add(row['saddr'])
    if row['daddr'] not in vertices:
        vertices.add(row['daddr'])
    edges.append((row['saddr'], row['daddr']))
    weights.append((row['cnt']))
vertices = list(vertices)
print(f'노드: {len(vertices)}')
print(f'에지: {len(edges)}')

In [None]:
g = igraph.Graph()
g.add_vertices(vertices)
g.add_edges([(x[0], x[1]) for x in edges])
g.es['weight'] = weights
g.write_pickle(f'upbit.igraph')

In [6]:
g = igraph.Graph.Read_Pickle('upbit.igraph')

In [7]:
partition = g.community_leiden(objective_function='modularity')

In [None]:
import collections

In [None]:
counter = collections.Counter(partition.membership)

In [None]:
counter.most_common(10)

In [None]:
fig = plt.figure(figsize=(8*1, 6*1))
fig.set_facecolor('white')
ax = fig.add_subplot()
ax.hist(counter.values(), max(counter.values()), density=True, histtype='step', cumulative=True, 
        color='indigo')
ax.set_xscale('log')
ax.tick_params(axis='x', labelsize='large')
ax.tick_params(axis='y', labelsize='large')
ax.set_xlabel('Community size', fontsize='x-large')
ax.set_ylabel('CDF', fontsize='x-large')
fig.savefig('upbit_community.png', bbox_inches='tight')

In [None]:
igraph.plot(partition, "t.png", mark_groups=True)

In [None]:
import random

N = 10

clusters = partition
member = clusters.membership
new_cmap = ['#'+''.join([random.choice('0123456789abcdef') for x in range(6)]) for z in range(len(clusters))]

vcolors = {v: new_cmap[i] for i, c in enumerate(clusters) for v in c}
g.vs["color"] = [vcolors[v] for v in g.vs.indices]

ecolors = {e.index: new_cmap[member[e.tuple[0]]] if member[e.tuple[0]]==member[e.tuple[1]] else "#e0e0e0" for e in g.es}
eweights = {e.index: (3*g.vcount()) if member[e.tuple[0]]==member[e.tuple[1]] else 0.1 for e in g.es}
g.es["weight"] = [eweights[e.index] for e in g.es]
g.es["color"] = [ecolors[e] for e in g.es.indices]

visual_style = dict()
visual_style["layout"] = g.layout_fruchterman_reingold(weights=g.es["weight"])
igraph.plot(g, "graph.pdf", **visual_style)

In [None]:
for color in plt.cm.rainbow:
    print(color)

In [None]:
['#'+''.join([random.choice('0123456789abcdef') for x in range(6)]) for z in range(10)]