In [1]:
import blocksci

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [3]:
import collections
import random

In [4]:
import utils

In [5]:
import analysis

In [6]:
chain = blocksci.Blockchain(utils.blocksci_config())

In [7]:
my_cm = blocksci.cluster.ClusterManager(utils.latest_clustering(), chain)

In [8]:
fp = blocksci.Fingerprinter(utils.latest_fingerprints(), chain)

### Load stored transactions

In [9]:
stored_txes = blocksci.GroundTruth(utils.remaining_txes(), chain)

### Export fingerprint heuristics

In [10]:
fingerprint_heuristics = {
    "fp_inout_count": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask([utils.Fingerprint.INPUT_COUNT, utils.Fingerprint.OUTPUT_COUNT])),
    "fp_output_count": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.OUTPUT_COUNT)),
    "fp_version": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.VERSION_2)),
    "fp_locktime": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask([utils.Fingerprint.LOCKTIME_HEIGHT, utils.Fingerprint.LOCKTIME_TIME])),
    "fp_rbf": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.RBF)),
    "fp_segwit": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.SEGWIT)),
    "fp_possible_segwit": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.SEGWIT_COMPATIBLE)),
    "fp_ordered_inouts": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.SORTED_INOUTS)),
    "fp_zeroconf": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.ZEROCONF)),
    "fp_multisig": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.MULTISIG)),
    "fp_p2pkh": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.P2PKH)),
    "fp_p2sh": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.P2SH)),
    "fp_p2wpkh": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.P2WPKH)),
    "fp_p2wsh": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask(utils.Fingerprint.P2WSH)),
    "fp_address_type": blocksci.heuristics.change.smart_fingerprint(fp, utils.get_fp_mask([utils.Fingerprint.MULTISIG, utils.Fingerprint.P2PKH, utils.Fingerprint.P2SH, utils.Fingerprint.P2WPKH, utils.Fingerprint.P2WSH, utils.Fingerprint.ADDRESS_OTHER])),
    "fp_absolute_fee": blocksci.heuristics.change.fixed_fee,
    "fp_relative_fee": blocksci.heuristics.change.fixed_fee_per_byte,
}

In [11]:
all_outputs_spent = stored_txes.transactions().map(lambda tx: tx.outputs.all(lambda o: o.is_spent))

In [12]:
df_fingerprint_heuristics = pd.DataFrame()

In [None]:
for key, heuristic in fingerprint_heuristics.items():
    print(key)
    result = stored_txes.transactions().map(lambda tx: heuristic.unique_index(tx))
    full_result = []
    assert len(result) == len(all_outputs_spent)
    for v, all_spent in zip(result, all_outputs_spent):
        if all_spent:
            full_result.append(int(v == 0) if v != -1 else -1)
            full_result.append(int(v == 1) if v != -1 else -1)
        else:
            full_result.append(-1)
            full_result.append(-1)
    del result
    df_fingerprint_heuristics[key] = full_result
    del full_result
    df_fingerprint_heuristics[key] = df_fingerprint_heuristics[key].astype("category")
    print(df_fingerprint_heuristics.memory_usage())
    print()

fp_inout_count
Index                   128
fp_inout_count    619306212
dtype: int64

fp_output_count
Index                    128
fp_inout_count     619306212
fp_output_count    619306212
dtype: int64

fp_version
Index                    128
fp_inout_count     619306212
fp_output_count    619306212
fp_version         619306212
dtype: int64

fp_locktime
Index                    128
fp_inout_count     619306212
fp_output_count    619306212
fp_version         619306212
fp_locktime        619306212
dtype: int64

fp_rbf
Index                    128
fp_inout_count     619306212
fp_output_count    619306212
fp_version         619306212
fp_locktime        619306212
fp_rbf             619306212
dtype: int64

fp_segwit
Index                    128
fp_inout_count     619306212
fp_output_count    619306212
fp_version         619306212
fp_locktime        619306212
fp_rbf             619306212
fp_segwit          619306212
dtype: int64

fp_possible_segwit


In [16]:
print(df_fingerprint_heuristics.memory_usage().sum() / 1000 / 1000 / 1000) 

10.528205732


In [18]:
df_fingerprint_heuristics.to_csv("/home/ubuntu/Data/export/20210720-fingerprint-heuristics.csv")