# Applicability of general types of heuristics

In [1]:
import blocksci

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
import seaborn as sns

In [4]:
from deco import *

In [5]:
import utils

In [6]:
chain = blocksci.Blockchain(utils.blocksci_config())

In [7]:
len(chain)

689257

In [8]:
cm = blocksci.cluster.ClusterManager(utils.latest_clustering(), chain)

In [9]:
cm.clusters().size

658522992

In [10]:
address_reuse = blocksci.heuristics.change.address_reuse

In [11]:
cluster_member = blocksci.heuristics.change.cluster_member(cm)

In [12]:
%time all_tx_count = chain.blocks.txes.size

CPU times: user 19.9 s, sys: 31.5 s, total: 51.3 s
Wall time: 4min 40s


In [13]:
all_tx_count

652618175

### Filter based on number of spendable outputs

In [14]:
def one_out(it):
    return it.txes.where(lambda tx: tx.output_count == 1)

In [15]:
def two_out(it):
    return it.txes.where(lambda tx: tx.output_count == 2)

In [16]:
def more_out(it):
    return it.txes.where(lambda tx: tx.output_count >= 3)

In [17]:
def map_txes(map_func):
    def reduce_func(a, b):
        return a + b
    return blocksci.mapreduce_block_ranges(chain, map_func, reduce_func)

### Number of transactions with one/two/three+ outputs

In [18]:
def count_func(it):
    return it.size

In [19]:
%time one_spendable_out = map_txes(lambda x: count_func(one_out(x)))

CPU times: user 5.99 s, sys: 3.85 s, total: 9.83 s
Wall time: 2min 22s


In [20]:
one_spendable_out

91108655

In [21]:
one_spendable_out / all_tx_count

0.13960483861792541

In [22]:
%time two_spendable_out = map_txes(lambda x: count_func(two_out(x)))

CPU times: user 6.77 s, sys: 517 ms, total: 7.29 s
Wall time: 2min 53s


In [23]:
two_spendable_out

494555377

In [24]:
two_spendable_out / all_tx_count

0.7578020287283602

In [25]:
%time more_spendable_out = map_txes(lambda x: count_func(more_out(x)))

CPU times: user 5.81 s, sys: 4.84 s, total: 10.6 s
Wall time: 3min 35s


In [26]:
more_spendable_out

66954143

In [27]:
more_spendable_out / all_tx_count

0.1025931326537144

In [28]:
assert 0 == all_tx_count - one_spendable_out - two_spendable_out - more_spendable_out

### Remove OP-RETURN

In [29]:
def op_return(it):
    return it.where(lambda tx: tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata)).size

In [30]:
def no_op_return(it):
    return it.where(lambda tx: ~(tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata))).size

In [31]:
%time one_op_return = map_txes(lambda x: op_return(one_out(x)))

CPU times: user 9.64 s, sys: 4.54 s, total: 14.2 s
Wall time: 4min 8s


In [32]:
one_op_return

393492

In [33]:
one_op_return / one_spendable_out

0.004318931060940368

In [34]:
%time two_op_return = map_txes(lambda x: op_return(two_out(x)))

CPU times: user 41.5 s, sys: 4.32 s, total: 45.8 s
Wall time: 4min 37s


In [35]:
two_op_return

28174926

In [36]:
two_op_return / two_spendable_out

0.05697021468234891

In [37]:
%time more_op_return = map_txes(lambda x: op_return(more_out(x)))

CPU times: user 13.5 s, sys: 4.89 s, total: 18.4 s
Wall time: 4min 13s


In [38]:
more_op_return

19633918

In [39]:
more_op_return / more_spendable_out

0.29324425823806005

### Redefine to ignore OP-RETURN

In [40]:
def one_out_no_nulldata(it):
    return it.txes.where(lambda tx: tx.output_count == 1).where(lambda tx: ~(tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata)))

In [41]:
def two_out_no_nulldata(it):
    return it.txes.where(lambda tx: tx.output_count == 2).where(lambda tx: ~(tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata)))

In [42]:
def more_out_no_nulldata(it):
    return it.txes.where(lambda tx: tx.output_count >= 3).where(lambda tx: ~(tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata)))

### Number of transactions where address reuse returns at least one change address

In [43]:
def address_reuse_func(t):
    return t.where(lambda tx: address_reuse(tx).size > 0).size

In [44]:
%time count_address_reuse_one = map_txes(lambda x: address_reuse_func(one_out_no_nulldata(x)))

CPU times: user 16.4 s, sys: 3.76 s, total: 20.2 s
Wall time: 4min 6s


In [45]:
%time count_address_reuse_two = map_txes(lambda x: address_reuse_func(two_out_no_nulldata(x)))

CPU times: user 1min 35s, sys: 1.67 s, total: 1min 37s
Wall time: 4min 48s


In [46]:
%time count_address_reuse_more = map_txes(lambda x: address_reuse_func(more_out_no_nulldata(x)))

CPU times: user 26.3 s, sys: 3.66 s, total: 30 s
Wall time: 3min 31s


In [47]:
count_address_reuse_one

600657

In [48]:
count_address_reuse_one / one_spendable_out

0.006592754552243143

In [49]:
count_address_reuse_two

103302662

In [50]:
count_address_reuse_two / two_spendable_out

0.20887986827004007

In [51]:
count_address_reuse_more

9069691

In [52]:
count_address_reuse_more / more_spendable_out

0.1354612365063055

### Number of transactions where no address reuse occurs, but cluster membership returns at least one change address

In [53]:
def cluster_member_func(t):
    return t.where(lambda tx: (address_reuse(tx).size == 0) & (cluster_member(tx).size > 0)).size

In [54]:
%time count_cluster_member_one = map_txes(lambda x: cluster_member_func(one_out_no_nulldata(x)))

CPU times: user 20.2 s, sys: 5.1 s, total: 25.3 s
Wall time: 5min 7s


In [55]:
count_cluster_member_one

2238598

In [56]:
count_cluster_member_one / one_spendable_out

0.02457064040732464

In [57]:
%time count_cluster_member_two = map_txes(lambda x: cluster_member_func(two_out_no_nulldata(x)))

CPU times: user 1min 55s, sys: 4.58 s, total: 2min
Wall time: 5min 28s


In [58]:
count_cluster_member_two

53412629

In [59]:
count_cluster_member_two / two_spendable_out

0.1080013108420819

In [60]:
%time count_cluster_member_more = map_txes(lambda x: cluster_member_func(more_out_no_nulldata(x)))

CPU times: user 34.6 s, sys: 4.36 s, total: 39 s
Wall time: 4min 27s


In [61]:
count_cluster_member_more

9291391

In [62]:
count_cluster_member_more / more_spendable_out

0.1387724580389297

### Number of transactions (excluding above) where outputs are unspent

In [63]:
def unspent_func(t):
    return t.where(lambda tx: tx.outputs.any(lambda o: ~o.is_spent) & (address_reuse(tx).size == 0) & (cluster_member(tx).size == 0)).size

In [64]:
%time unspent_one = map_txes(lambda x: unspent_func(one_out_no_nulldata(x)))

CPU times: user 13.1 s, sys: 4.39 s, total: 17.5 s
Wall time: 4min 35s


In [65]:
unspent_one

3075914

In [66]:
unspent_one / one_spendable_out

0.03376094181173018

In [67]:
%time unspent_two = map_txes(lambda x: unspent_func(two_out_no_nulldata(x)))

CPU times: user 1min 21s, sys: 4.4 s, total: 1min 25s
Wall time: 5min 30s


In [68]:
unspent_two

19299045

In [69]:
unspent_two / two_spendable_out

0.03902302127836333

In [70]:
%time unspent_more = map_txes(lambda x: unspent_func(more_out_no_nulldata(x)))

CPU times: user 26.2 s, sys: 4.33 s, total: 30.5 s
Wall time: 4min 39s


In [71]:
unspent_more

4447402

In [72]:
unspent_more / more_spendable_out

0.06642459750399612

In [73]:
unspent_one + unspent_two + unspent_more

26822361