In [None]:
import blocksci
import matplotlib.pyplot as plt
import matplotlib.ticker
import collections
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

parser_data_directory = Path("/mnt/anal/config.json")
cluster_directory = Path("/mnt/anal/cluster/")
dumplings_directory = Path("/mnt/dumplings/")

chain = blocksci.Blockchain(str(parser_data_directory))

from typing import Tuple

def get_block_height_for_date(date: str) -> int:
    return chain.range(date)[0].height

def get_block_height_range(start: str, end: str) -> Tuple[int, int]:
    return get_block_height_for_date(start), get_block_height_for_date(end)

In [2]:
import json

ww2_false = set(json.loads(open("/mnt/anal/false_cjtxs_ww2.json").read()))
ww1_false = set(json.loads(open("/mnt/anal/false_cjtxs_ww1.json").read()))

In [4]:
"""
Table structure
cj pool | operating period | total cjtxs | fresh inputs | remix rate | unmoved UTXOs | num of inputs (min, avg, max)
ww1
ww2 (zkSNACKs)
ww2 
wp all
wp 5m
wp 1m
wp 50m
wp 100k
"""
april_24_block_height = get_block_height_for_date('2024-04-24')
june_1_block_height = get_block_height_for_date('2024-06-01')
last_august_block_height = get_block_height_for_date('2024-08-30')

In [None]:
%time ww2_cjs = chain.filter_coinjoin_txes(0, june_1_block_height, "ww2")
# %time ww2_cjs_hashes = set(str(tx.hash) for tx in ww2_cjs) - ww2_false

In [None]:
%time ww2_cjs_other = chain.filter_coinjoin_txes(june_1_block_height, last_august_block_height, "ww2")
%time ww2_cjs_other_hashes = set(str(tx.hash) for tx in ww2_cjs_other) - ww2_false

In [None]:
%time wp_cjs = chain.filter_coinjoin_txes(0, april_24_block_height, "wp")
%time wp_cjs_hashes = set(str(tx.hash) for tx in wp_cjs)

In [None]:
%time ww1_cjs = chain.filter_coinjoin_txes(0, june_1_block_height, "ww1")
%time ww1_cjs_hashes = set(str(tx.hash) for tx in ww1_cjs) - ww1_false

In [None]:
god_set_of_all_coinjoins = ww1_cjs_hashes | wp_cjs_hashes | ww2_cjs_hashes | ww2_cjs_other_hashes

In [None]:
import copy
import json

wp_stats = {
    "all_txes": 0,
    "all_inputs": 0,
    "all_inputs_value": 0,
    "fresh_inputs": 0,
    "fresh_inputs_value": 0,
    "all_outputs": 0,
    "remixed_outputs": 0,
    "unmoved_outputs": 0,
    "unmoved_outputs_value": 0,
    "traverse_inputs": 0,
    "traverse_inputs_value": 0
}

ww2_stats = copy.deepcopy(wp_stats)
ww2_other_stats = copy.deepcopy(wp_stats)
ww1_stats = copy.deepcopy(wp_stats)

whirlpool_stats = {
    "all": copy.deepcopy(wp_stats),
    "5m": copy.deepcopy(wp_stats),
    "1m": copy.deepcopy(wp_stats),
    "50m": copy.deepcopy(wp_stats),
    "100k": copy.deepcopy(wp_stats),
}

def get_pool(ws, tx):
    max_pool_value = max(x.value for x in tx.outputs)
    if max_pool_value == 5000000:
        return ws["5m"]
    if max_pool_value == 1000000:
        return ws["1m"]
    if max_pool_value == 50000000:
        return ws["50m"]
    if max_pool_value == 100000:
        return ws["100k"]

    raise ValueError(f"wtf {max_pool_value}")

def check_fdnp(tx: blocksci.Tx) -> int:
    s = 0
    
    for i in tx.inputs:
        if str(i.spent_tx.hash) in ww2_cjs_hashes or str(i.spent_tx.hash) in ww2_cjs_other_hashes:
            s += i.value

    return s


def should_ignore_input(tx):
    s = 0
    rv = False
    for i in tx.inputs:
        sp_tx = i.spent_tx
        if str(sp_tx.hash) in ww1_cjs_hashes:
            s += i.value
            rv = True
        elif str(i.spent_tx.hash) in ww2_cjs_hashes or str(i.spent_tx.hash) in ww2_cjs_other_hashes:
            s += i.value
            rv = True
    return s, rv

from collections import defaultdict
from typing import Dict, Set, List


def get_stats_for_cj(
    the_stats: Dict[str, int], 
    tx_objects: List[blocksci.Tx], 
    given_cjs: Set[str], 
    is_whirlpool: bool = False, 
    is_ww2: bool = False, 
    is_ww1: bool = False
):
    tx: blocksci.Tx
    tx_input: blocksci.Input
    tx_output: blocksci.Output
    dd_int = defaultdict(int)

    for tx in tx_objects:
        # filter out false positives
        if str(tx.hash) not in given_cjs:
            continue

        if is_whirlpool:
            tx_pool = get_pool(the_stats, tx)
            current_stats = the_stats["all"]
        else:
            tx_pool = dd_int
            current_stats = the_stats

        current_stats["all_txes"] += 1
        tx_pool["all_txes"] += 1
        
        current_stats["all_inputs"] += tx.input_count
        for tx_input in tx.inputs:
            current_stats["all_inputs_value"] += tx_input.value
            
            if str(tx_input.spent_tx.hash) not in given_cjs:
                how_much = 0
                ignore = False
                if is_ww1 or is_ww2:
                    how_much, ignore = should_ignore_input(tx_input.spent_tx)
  
                current_stats["fresh_inputs"] += (0 if ignore else 1) 
                tx_pool["fresh_inputs"] += 1 
                current_stats["fresh_inputs_value"] += (tx_input.value if not ignore else 0)
                tx_pool["fresh_inputs_value"] += tx_input.value

            if str(tx_input.spent_tx.hash) not in given_cjs and str(tx_input.spent_tx.hash) in god_set_of_all_coinjoins:
                current_stats["traverse_inputs"] += 1
                tx_pool["traverse_inputs"] += 1
                current_stats["traverse_inputs_value"] += tx_input.value
                tx_pool["traverse_inputs_value"] += tx_input.value

            if str(tx_input.spent_tx.hash) in given_cjs:
                current_stats["remixed_outputs"] += 1  
                tx_pool["remixed_outputs"] += 1  

        current_stats["all_outputs"] += tx.output_count
        tx_pool["all_outputs"] += tx.output_count
        for tx_output in tx.outputs:
            if not tx_output.is_spent:
                current_stats["unmoved_outputs"] += 1
                tx_pool["unmoved_outputs"] += 1
                current_stats["unmoved_outputs_value"] += tx_output.value
                tx_pool["unmoved_outputs_value"] += tx_output.value
                continue
                


In [None]:
%time get_stats_for_cj(whirlpool_stats, wp_cjs, wp_cjs_hashes, is_whirlpool=True)
%time get_stats_for_cj(ww2_stats, ww2_cjs, ww2_cjs_hashes, is_ww2=True)
%time get_stats_for_cj(ww2_other_stats, ww2_cjs_other, ww2_cjs_other_hashes, is_ww2=True)
%time get_stats_for_cj(ww1_stats, ww1_cjs, ww1_cjs_hashes, is_ww1=True)


In [None]:
from tabulate import tabulate

In [None]:
print(tabulate(
    [
        [
            "Wasabi 1.0 (zkSNACKs)", 
            "2018-07-19 – 2024-06-01", 
            len(ww1_cjs_hashes), 
            f"{ww1_stats['fresh_inputs'] // 1000}k/{round(ww1_stats['fresh_inputs_value'] / 100000000, 1)} BTC", 
            f"{round((ww1_stats['remixed_outputs'] / ww1_stats['all_outputs']) * 100, 1)}%", 
            f"{round((ww1_stats['unmoved_outputs'] / (ww1_stats['all_outputs'] - ww1_stats['remixed_outputs'])) * 100, 1)}%, {round(ww1_stats['unmoved_outputs_value'] / 100000000, 0)}"
        ],
        [
            "Wasabi 2.0 (zkSNACKs)", 
            "2022-06-18 – 2024-06-01", 
            len(ww2_cjs_hashes), 
            f"{ww2_stats['fresh_inputs'] // 1000}k/{round(ww2_stats['fresh_inputs_value'] / 100000000, 0)} BTC", 
            f"{round((ww2_stats['remixed_outputs'] / ww2_stats['all_outputs']) * 100, 1)}%", 
            f"{round((ww2_stats['unmoved_outputs'] / (ww2_stats['all_outputs'] - ww2_stats['remixed_outputs'])) * 100, 1)}%, {round(ww2_stats['unmoved_outputs_value'] / 100000000, 0)}"        
        ],
        [
            "Wasabi 2.0 (other)", 
            "2024-06-01 – current", 
            len(ww2_cjs_other_hashes), 
            f"{ww2_other_stats['fresh_inputs'] // 1000}k/{round(ww2_other_stats['fresh_inputs_value'] / 100000000, 0)} BTC", 
            f"{round((ww2_other_stats['remixed_outputs'] / ww2_other_stats['all_outputs']) * 100, 1)}%", 
            f"{round((ww2_other_stats['unmoved_outputs'] / (ww2_other_stats['all_outputs'] - ww2_other_stats['remixed_outputs'])) * 100, 1)}%, {round(ww2_other_stats['unmoved_outputs_value'] / 100000000, 0)}"        
        ],
        *(
            [
                f"Whirlpool {key}", 
                "2019-04-17 – 2024-04-24", 
                whirlpool_stats[key]["all_txes"], 
                f"{whirlpool_stats[key]['fresh_inputs'] // 1000}k/{round(whirlpool_stats[key]['fresh_inputs_value'] / 100000000, 1)} BTC", 
                f"{round((whirlpool_stats[key]['remixed_outputs'] / max(whirlpool_stats[key]['all_outputs'], 1)) * 100, 1)}%", 
                f"{round(whirlpool_stats[key]['unmoved_outputs'] / (max((whirlpool_stats[key]['all_outputs'] - whirlpool_stats[key]['remixed_outputs']), 1)) * 100, 1)}%, {round(whirlpool_stats[key]['unmoved_outputs_value'] / 100000000, 0)}"        
            ] for key in whirlpool_stats
        )
        
    ],
    headers=['Pool', 'Operating period', "Total cjtxs", "Fresh inputs", "Remix rate", "Unmoved UTXOs"], 
    tablefmt='orgtbl'
    )
)


In [58]:
print(len(ww1_cjs_hashes) - len(ww1_cjs_hashes - ww1_false), len(ww2_cjs_hashes) - len(ww2_cjs_hashes - ww2_false))

0 0
